diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 306c57f..038dd3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -461,6 +461,7 @@ MERGE_TOO_MANY_UPDATE(10406, "MERGE statment can have at most 1 WHEN MATCHED ... UPDATE clause: <{0}>", true), INVALID_JOIN_CONDITION(10407, "Error parsing condition in outer join"), INVALID_TARGET_COLUMN_IN_SET_CLAUSE(10408, "Target column \"{0}\" of set clause is not found in table \"{1}\".", true), + HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY(10409, "Expression in GROUPING function not present in GROUP BY"), //========================== 20000 range starts here ========================// SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."), SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. " diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 87330ed..6f01da0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -342,6 +342,8 @@ system.registerUDF("shiftright", UDFOPBitShiftRight.class, true); system.registerUDF("shiftrightunsigned", UDFOPBitShiftRightUnsigned.class, true); + system.registerGenericUDF("grouping", GenericUDFGrouping.class); + system.registerGenericUDF("current_database", UDFCurrentDB.class); system.registerGenericUDF("current_date", GenericUDFCurrentDate.class); system.registerGenericUDF("current_timestamp", GenericUDFCurrentTimestamp.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index f28d33e..cddf14f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -31,8 +31,6 @@ import java.util.Map; import java.util.Set; -import javolution.util.FastBitSet; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -64,8 +62,11 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; +import javolution.util.FastBitSet; + /** * GroupBy operator implementation. */ @@ -127,7 +128,7 @@ private transient int groupingSetsPosition; // position of grouping set, generally the last of keys private transient List groupingSets; // declared grouping set values private transient FastBitSet[] groupingSetsBitSet; // bitsets acquired from grouping set values - private transient Text[] newKeysGroupingSets; + private transient IntWritable[] newKeysGroupingSets; // for these positions, some variable primitive type (String) is used, so size // cannot be estimated. sample it at runtime. @@ -218,13 +219,13 @@ protected void initializeOp(Configuration hconf) throws HiveException { if (groupingSetsPresent) { groupingSets = conf.getListGroupingSets(); groupingSetsPosition = conf.getGroupingSetPosition(); - newKeysGroupingSets = new Text[groupingSets.size()]; + newKeysGroupingSets = new IntWritable[groupingSets.size()]; groupingSetsBitSet = new FastBitSet[groupingSets.size()]; int pos = 0; for (Integer groupingSet: groupingSets) { // Create the mapping corresponding to the grouping set - newKeysGroupingSets[pos] = new Text(String.valueOf(groupingSet)); + newKeysGroupingSets[pos] = new IntWritable(groupingSet); groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet); pos++; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f1f3bf9..16df496 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -81,7 +81,6 @@ import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule; import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule; import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule; -import org.apache.calcite.rel.rules.SemiJoinRule; import org.apache.calcite.rel.rules.UnionMergeRule; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; @@ -109,6 +108,7 @@ import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.ImmutableIntList; import org.apache.calcite.util.Pair; +import org.apache.commons.lang.mutable.MutableBoolean; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; @@ -240,6 +240,7 @@ import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; +import com.google.common.math.IntMath; public class CalcitePlanner extends SemanticAnalyzer { @@ -2964,7 +2965,16 @@ private RelNode genSelectForWindowing(QB qb, RelNode srcRel, HashSet // 4. Walk through Window Expressions & Construct RexNodes for those, // Update out_rwsch + final QBParseInfo qbp = getQBParseInfo(qb); + final String selClauseName = qbp.getClauseNames().iterator().next(); + final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty() + || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty()); for (WindowExpressionSpec wExprSpec : windowExpressions) { + if (cubeRollupGrpSetPresent) { + // Special handling of grouping function + wExprSpec.setExpression(rewriteGroupingFunctionAST( + getGroupByForClause(qbp, selClauseName), wExprSpec.getExpression())); + } if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) { Pair wtp = genWindowingProj(qb, wExprSpec, srcRel); projsForWindowSelOp.add(wtp.getKey()); @@ -3067,6 +3077,9 @@ private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel) String selClauseName = qbp.getClauseNames().iterator().next(); ASTNode selExprList = qbp.getSelForClause(selClauseName); + final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty() + || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty()); + // 2.Row resolvers for input, output RowResolver out_rwsch = new RowResolver(); Integer pos = Integer.valueOf(0); @@ -3238,6 +3251,10 @@ private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel) TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); // We allow stateful functions in the SELECT list (but nowhere else) tcCtx.setAllowStatefulFunctions(true); + if (cubeRollupGrpSetPresent) { + // Special handling of grouping function + expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, selClauseName), expr); + } ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); String recommended = recommendName(exp, colAlias); if (recommended != null && out_rwsch.get(null, recommended) == null) { @@ -3604,6 +3621,7 @@ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map grpByAstExprs, ASTNode targetNode) throws SemanticException { + final MutableBoolean visited = new MutableBoolean(false); + final MutableBoolean found = new MutableBoolean(false); + + TreeVisitorAction action = new TreeVisitorAction() { + + @Override + public Object pre(Object t) { + return t; + } + + @Override + public Object post(Object t) { + ASTNode root = (ASTNode) t; + if (root.getType() == HiveParser.TOK_FUNCTION && root.getChildCount() == 2) { + ASTNode func = (ASTNode) ParseDriver.adaptor.getChild(root, 0); + if (func.getText().equals("grouping")) { + ASTNode c = (ASTNode) ParseDriver.adaptor.getChild(root, 1); + visited.setValue(true); + for (int i = 0; i < grpByAstExprs.size(); i++) { + ASTNode grpByExpr = grpByAstExprs.get(i); + if (grpByExpr.toStringTree().equals(c.toStringTree())) { + ASTNode child1 = (ASTNode) ParseDriver.adaptor.create( + HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"); + ParseDriver.adaptor.addChild(child1, ParseDriver.adaptor.create( + HiveParser.Identifier, VirtualColumn.GROUPINGID.getName())); + ASTNode child2 = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, + String.valueOf(IntMath.mod(-i, grpByAstExprs.size()))); + root.setChild(1, child1); + root.addChild(child2); + found.setValue(true); + break; + } + } + } + } + return t; + } + }; + ASTNode newTargetNode = (ASTNode) new TreeVisitor(ParseDriver.adaptor).visit(targetNode, action); + if (visited.booleanValue() && !found.booleanValue()) { + throw new SemanticException(ErrorMsg.HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY.getMsg()); + } + return newTargetNode; + } + private Operator genPlanForSubQueryPredicate( QB qbSQ, ISubQueryJoinInfo subQueryPredicate) throws SemanticException { @@ -4101,6 +4163,9 @@ static boolean isRegex(String pattern, HiveConf conf) { startPosn = 0; } + final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty() + || !qb.getParseInfo().getDestGroupingSets().isEmpty() + || !qb.getParseInfo().getDestCubes().isEmpty()); Set colAliases = new HashSet(); ASTNode[] exprs = new ASTNode[exprList.getChildCount()]; String[][] aliases = new String[exprList.getChildCount()][]; @@ -4186,6 +4251,11 @@ static boolean isRegex(String pattern, HiveConf conf) { // We allow stateful functions in the SELECT list (but nowhere else) tcCtx.setAllowStatefulFunctions(true); tcCtx.setAllowDistinctFunctions(false); + if (!isCBOExecuted() && cubeRollupGrpSetPresent) { + // If CBO did not optimize the query, we might need to replace grouping function + // Special handling of grouping function + expr = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), dest), expr); + } ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); String recommended = recommendName(exp, colAlias); if (recommended != null && !colAliases.contains(recommended) && @@ -4684,7 +4754,7 @@ private void addGroupingSetKey(List groupByKeys, // For grouping sets, add a dummy grouping key String groupingSetColumnName = groupByInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName(); - ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, + ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, groupingSetColumnName, null, false); groupByKeys.add(inputExpr); @@ -4693,7 +4763,7 @@ private void addGroupingSetKey(List groupByKeys, groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), new ColumnInfo( field, - TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.intTypeInfo, null, true)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); @@ -4715,7 +4785,7 @@ private void processGroupingSetReduceSinkOperator(RowResolver reduceSinkInputRow // add a key for reduce sink String groupingSetColumnName = reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName(); - ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, + ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, groupingSetColumnName, null, false); reduceKeys.add(inputExpr); @@ -4947,14 +5017,14 @@ private void createNewGroupingKey(List groupByKeys, Map colExprMap) { // The value for the constant does not matter. It is replaced by the grouping set // value for the actual implementation - ExprNodeConstantDesc constant = new ExprNodeConstantDesc("0"); + ExprNodeConstantDesc constant = new ExprNodeConstantDesc(0); groupByKeys.add(constant); String field = getColumnInternalName(groupByKeys.size() - 1); outputColumnNames.add(field); groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), new ColumnInfo( field, - TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.intTypeInfo, null, true)); colExprMap.put(field, constant); @@ -9471,7 +9541,7 @@ private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb, } if(queryProperties.hasWindowing() && qb.getWindowingSpec(dest) != null) { - curr = genWindowingPlan(qb.getWindowingSpec(dest), curr); + curr = genWindowingPlan(qb, qb.getWindowingSpec(dest), curr); // GBy for DISTINCT after windowing if ((qbp.getAggregationExprsForClause(dest).size() != 0 || getGroupByForClause(qbp, dest).size() > 0) @@ -12972,8 +13042,24 @@ private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operato //--------------------------- Windowing handling: PTFInvocationSpec to PTFDesc -------------------- - Operator genWindowingPlan(WindowingSpec wSpec, Operator input) throws SemanticException { + Operator genWindowingPlan(QB qb, WindowingSpec wSpec, Operator input) throws SemanticException { wSpec.validateAndMakeEffective(); + + if (!isCBOExecuted()) { + // If CBO did not optimize the query, we might need to replace grouping function + final String selClauseName = qb.getParseInfo().getClauseNames().iterator().next(); + final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty() + || !qb.getParseInfo().getDestGroupingSets().isEmpty() + || !qb.getParseInfo().getDestCubes().isEmpty()); + if (cubeRollupGrpSetPresent) { + for (WindowExpressionSpec wExprSpec : wSpec.getWindowExpressions()) { + // Special handling of grouping function + wExprSpec.setExpression(rewriteGroupingFunctionAST( + getGroupByForClause(qb.getParseInfo(), selClauseName), wExprSpec.getExpression())); + } + } + } + WindowingComponentizer groups = new WindowingComponentizer(wSpec); RowResolver rr = opParseCtx.get(input).getRowResolver(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java new file mode 100644 index 0000000..cc01526 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFType; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector; + +/** + * UDF grouping + */ +@Description(name = "grouping", +value = "_FUNC_(a, b) - Indicates whether a specified column expression in " ++ "is aggregated or not. Returns 1 for aggregated or 0 for not aggregated. ", +extended = "a is the grouping id, b is the index we want to extract") +@UDFType(deterministic = true) +@NDV(maxNdv = 2) +public class GenericUDFGrouping extends GenericUDF { + + private transient IntObjectInspector groupingIdOI; + private int index = 0; + private ByteWritable byteWritable = new ByteWritable(); + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 2) { + throw new UDFArgumentLengthException( + "grouping() requires 2 argument, got " + arguments.length); + } + + if (arguments[0].getCategory() != Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, "The first argument to grouping() must be primitive"); + } + PrimitiveObjectInspector arg1OI = (PrimitiveObjectInspector) arguments[0]; + if (arg1OI.getPrimitiveCategory() != PrimitiveCategory.INT) { + throw new UDFArgumentTypeException(0, "The first argument to grouping() must be an integer"); + } + groupingIdOI = (IntObjectInspector) arguments[0]; + + PrimitiveObjectInspector arg2OI = (PrimitiveObjectInspector) arguments[1]; + if (!(arg2OI instanceof WritableConstantIntObjectInspector)) { + throw new UDFArgumentTypeException(1, "The second argument to grouping() must be a constant"); + } + index = ((WritableConstantIntObjectInspector)arg2OI).getWritableConstantValue().get(); + + return PrimitiveObjectInspectorFactory.writableByteObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + // groupingId = PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI); + // Check that the bit at the given index is '1' or '0' + byteWritable.set((byte) + ((PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI) >> index) & 1)); + return byteWritable; + } + + @Override + public String getDisplayString(String[] children) { + assert (children.length == 2); + return getStandardDisplayString("grouping", children); + } + +} diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q new file mode 100644 index 0000000..1b753e1 --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q @@ -0,0 +1,89 @@ +CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1; + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +set hive.cbo.enable=false; + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; diff --git ql/src/test/queries/clientpositive/perf/query22.q ql/src/test/queries/clientpositive/perf/query22.q index 17cece2..adb509d 100644 --- ql/src/test/queries/clientpositive/perf/query22.q +++ ql/src/test/queries/clientpositive/perf/query22.q @@ -1 +1,21 @@ -explain select i_product_name ,i_brand ,i_class ,i_category ,avg(inv_quantity_on_hand) qoh from inventory ,date_dim ,item ,warehouse where inventory.inv_date_sk=date_dim.d_date_sk and inventory.inv_item_sk=item.i_item_sk and inventory.inv_warehouse_sk = warehouse.w_warehouse_sk and date_dim.d_month_seq between 1193 and 1193 + 11 group by i_product_name ,i_brand ,i_class ,i_category with rollup order by qoh, i_product_name, i_brand, i_class, i_category limit 100; +explain +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + ,warehouse + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and d_month_seq between 1212 and 1212 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100; + diff --git ql/src/test/queries/clientpositive/perf/query27.q ql/src/test/queries/clientpositive/perf/query27.q index 58be664..0cbb3ae 100644 --- ql/src/test/queries/clientpositive/perf/query27.q +++ ql/src/test/queries/clientpositive/perf/query27.q @@ -1 +1,22 @@ -explain select i_item_id, s_state, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, store, item where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_item_sk = item.i_item_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk and customer_demographics.cd_gender = 'F' and customer_demographics.cd_marital_status = 'D' and customer_demographics.cd_education_status = 'Unknown' and date_dim.d_year = 1998 and store.s_state in ('KS','AL', 'MN', 'AL', 'SC', 'VT') group by i_item_id, s_state order by i_item_id ,s_state limit 100; +explain +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100; + diff --git ql/src/test/queries/clientpositive/perf/query36.q ql/src/test/queries/clientpositive/perf/query36.q new file mode 100644 index 0000000..6c3a945 --- /dev/null +++ ql/src/test/queries/clientpositive/perf/query36.q @@ -0,0 +1,29 @@ +explain +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('SD','FL','MI','LA', + 'MO','SC','AL','GA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100; + diff --git ql/src/test/queries/clientpositive/perf/query67.q ql/src/test/queries/clientpositive/perf/query67.q index 56ef907..ca2fc61 100644 --- ql/src/test/queries/clientpositive/perf/query67.q +++ ql/src/test/queries/clientpositive/perf/query67.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; -explain + +explain select * from (select i_category ,i_class @@ -24,11 +25,11 @@ from (select i_category ,date_dim ,store ,item - where store_sales.ss_sold_date_sk=date_dim.d_date_sk - and store_sales.ss_item_sk=item.i_item_sk - and store_sales.ss_store_sk = store.s_store_sk - and d_month_seq between 1193 and 1193+11 - group by i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id with rollup)dw1) dw2 + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1212 and 1212+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 where rk <= 100 order by i_category ,i_class @@ -42,4 +43,3 @@ order by i_category ,rk limit 100; - diff --git ql/src/test/queries/clientpositive/perf/query70.q ql/src/test/queries/clientpositive/perf/query70.q index 07d68e7..80f8c23 100644 --- ql/src/test/queries/clientpositive/perf/query70.q +++ ql/src/test/queries/clientpositive/perf/query70.q @@ -1,2 +1,39 @@ set hive.mapred.mode=nonstrict; -explain select sum(ss_net_profit) as total_sum ,s_state ,s_county ,grouping__id as lochierarchy , rank() over(partition by grouping__id, case when grouping__id == 2 then s_state end order by sum(ss_net_profit)) as rank_within_parent from store_sales ss join date_dim d1 on d1.d_date_sk = ss.ss_sold_date_sk join store s on s.s_store_sk = ss.ss_store_sk where d1.d_month_seq between 1193 and 1193+11 and s.s_state in ( select s_state from (select s_state as s_state, sum(ss_net_profit), rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking from store_sales, store, date_dim where d_month_seq between 1193 and 1193+11 and date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk group by s_state ) tmp1 where ranking <= 5 ) group by s_state,s_county with rollup order by lochierarchy desc ,case when lochierarchy = 0 then s_state end ,rank_within_parent limit 100; + +explain +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store s + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s.s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100; + diff --git ql/src/test/queries/clientpositive/perf/query86.q ql/src/test/queries/clientpositive/perf/query86.q new file mode 100644 index 0000000..07a9ec5 --- /dev/null +++ ql/src/test/queries/clientpositive/perf/query86.q @@ -0,0 +1,25 @@ +explain +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100; + diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index 99be3c1..d134d27 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -360,25 +360,25 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true File Output Operator compressed: false - Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -414,25 +414,25 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -464,25 +464,25 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -518,25 +518,25 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -572,25 +572,25 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -626,25 +626,25 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -680,25 +680,25 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true File Output Operator compressed: false - Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -791,25 +791,25 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -900,18 +900,18 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE @@ -954,18 +954,18 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE @@ -1004,18 +1004,18 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE @@ -1058,18 +1058,18 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE @@ -1112,18 +1112,18 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE @@ -1166,18 +1166,18 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE @@ -1220,18 +1220,18 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE @@ -1327,18 +1327,18 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: state (type: string), locid (type: int), '0' (type: string) + keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out index 9535344..5e5efa8 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out @@ -147,18 +147,18 @@ STAGE PLANS: outputColumnNames: state, country Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: state (type: string), country (type: string), '0' (type: string) + keys: state (type: string), country (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE Column stats: NONE @@ -313,25 +313,25 @@ STAGE PLANS: outputColumnNames: state, country Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: state (type: string), country (type: string), '0' (type: string) + keys: state (type: string), country (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 10320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7080 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 40 Data size: 10320 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 40 Data size: 7080 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -420,25 +420,25 @@ STAGE PLANS: outputColumnNames: state, country Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: state (type: string), country (type: string), '0' (type: string) + keys: state (type: string), country (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 80 Data size: 20640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80 Data size: 14160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 80 Data size: 20640 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 80 Data size: 14160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/groupby_cube1.q.out ql/src/test/results/clientpositive/groupby_cube1.q.out index 0258bb8..9eea534 100644 --- ql/src/test/results/clientpositive/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -41,20 +41,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -100,20 +100,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -185,25 +185,25 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -269,19 +269,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) - keys: key (type: string), '0' (type: string), val (type: string) + keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -342,12 +342,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -355,7 +355,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -371,7 +371,7 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -379,7 +379,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -452,19 +452,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) - keys: key (type: string), '0' (type: string), val (type: string) + keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -480,7 +480,7 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -488,7 +488,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: final outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -574,12 +574,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -590,7 +590,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -603,7 +603,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -619,7 +619,7 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -627,7 +627,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -663,7 +663,7 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -671,7 +671,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -687,7 +687,7 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -695,7 +695,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out index 6eaef7e..f6e1b17 100644 --- ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out +++ ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out @@ -50,21 +50,21 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: key (type: string), value (type: string), '0' (type: string) + keys: key (type: string), value (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: key (type: string), value (type: string), '0' (type: string) + keys: key (type: string), value (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -76,12 +76,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) + expressions: _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -111,13 +111,13 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out index 09d52c0..39a9e6c 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out @@ -53,7 +53,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE @@ -69,15 +69,15 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE @@ -137,7 +137,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE @@ -153,15 +153,15 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE @@ -244,7 +244,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE @@ -260,15 +260,15 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: double) Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE @@ -373,7 +373,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 24 Data size: 168 Basic stats: COMPLETE Column stats: NONE @@ -389,15 +389,15 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 24 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out index 04ece02..8428631 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out @@ -59,20 +59,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(c), count() - keys: a (type: string), b (type: string), '0' (type: string) + keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 288 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 2 Data size: 144 Basic stats: COMPLETE Column stats: NONE @@ -118,20 +118,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(c), count() - keys: a (type: string), b (type: string), '0' (type: string) + keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 288 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 2 Data size: 144 Basic stats: COMPLETE Column stats: NONE @@ -219,7 +219,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 288 Basic stats: COMPLETE Column stats: NONE @@ -235,15 +235,15 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 288 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 2 Data size: 144 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out index 095113a..f688da3 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out @@ -56,20 +56,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: a (type: string), b (type: string), '0' (type: string) + keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE @@ -133,20 +133,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: a (type: string), b (type: string), '0' (type: string) + keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE @@ -203,20 +203,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: a (type: string), b (type: string), '0' (type: string) + keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE @@ -280,20 +280,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: a (type: string), b (type: string), '0' (type: string) + keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE @@ -398,7 +398,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE @@ -414,15 +414,15 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE @@ -499,7 +499,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE @@ -515,15 +515,15 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out index e91bd41..de019e3 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out @@ -62,7 +62,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE @@ -78,15 +78,15 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE @@ -151,7 +151,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE @@ -167,15 +167,15 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE @@ -292,7 +292,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE @@ -308,15 +308,15 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out index 9b90dec..8166240 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out @@ -41,18 +41,18 @@ STAGE PLANS: predicate: (UDFToDouble(a) = 5.0) (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: a (type: string), b (type: string), '0' (type: string) + keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE @@ -112,18 +112,18 @@ STAGE PLANS: predicate: (UDFToDouble(a) = 5.0) (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: a (type: string), b (type: string), '0' (type: string) + keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out new file mode 100644 index 0000000..dcc80f7 --- /dev/null +++ ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out @@ -0,0 +1,765 @@ +PREHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0) (type: tinyint), grouping(_col2, 1) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 0 0 0 +1 NULL 1 1 0 +1 NULL 3 1 1 +1 1 3 1 1 +2 NULL 1 1 0 +2 2 3 1 1 +3 NULL 1 1 0 +3 NULL 3 1 1 +3 3 3 1 1 +4 NULL 1 1 0 +4 5 3 1 1 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0) (type: tinyint), grouping(_col2, 1) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 0 0 0 +NULL NULL 2 0 1 +NULL 1 2 0 1 +NULL 2 2 0 1 +NULL 3 2 0 1 +NULL 5 2 0 1 +1 NULL 1 1 0 +1 NULL 3 1 1 +1 1 3 1 1 +2 NULL 1 1 0 +2 2 3 1 1 +3 NULL 1 1 0 +3 NULL 3 1 1 +3 3 3 1 1 +4 NULL 1 1 0 +4 5 3 1 1 +PREHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToInteger(grouping(_col2, 0)) = 1) (type: boolean) + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 NULL +1 NULL +1 1 +2 NULL +2 2 +3 NULL +3 NULL +3 3 +4 NULL +4 5 +PREHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((UDFToInteger(grouping(_col2, 0)) = 1) or (UDFToInteger(grouping(_col2, 1)) = 1)) (type: boolean) + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 0) + grouping(_col2, 1)) (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int) + sort order: -+ + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +4 5 2 +3 3 2 +3 NULL 2 +2 2 2 +1 1 2 +1 NULL 2 +NULL 1 1 +NULL NULL 1 +NULL 5 1 +NULL 3 1 +NULL 2 1 +1 NULL 1 +2 NULL 1 +3 NULL 1 +4 NULL 1 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0) (type: tinyint), grouping(_col2, 1) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 0 0 0 +1 NULL 1 1 0 +1 NULL 3 1 1 +1 1 3 1 1 +2 NULL 1 1 0 +2 2 3 1 1 +3 NULL 1 1 0 +3 NULL 3 1 1 +3 3 3 1 1 +4 NULL 1 1 0 +4 5 3 1 1 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0) (type: tinyint), grouping(_col2, 1) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 0 0 0 +NULL NULL 2 0 1 +NULL 1 2 0 1 +NULL 2 2 0 1 +NULL 3 2 0 1 +NULL 5 2 0 1 +1 NULL 1 1 0 +1 NULL 3 1 1 +1 1 3 1 1 +2 NULL 1 1 0 +2 2 3 1 1 +3 NULL 1 1 0 +3 NULL 3 1 1 +3 3 3 1 1 +4 NULL 1 1 0 +4 5 3 1 1 +PREHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (grouping(_col2, 0) = 1) (type: boolean) + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 NULL +1 NULL +1 1 +2 NULL +2 2 +3 NULL +3 NULL +3 3 +4 NULL +4 5 +PREHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((grouping(_col2, 0) = 1) or (grouping(_col2, 1) = 1)) (type: boolean) + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 0) + grouping(_col2, 1)) (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int) + sort order: -+ + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +4 5 2 +3 3 2 +3 NULL 2 +2 2 2 +1 1 2 +1 NULL 2 +NULL 1 1 +NULL NULL 1 +NULL 5 1 +NULL 3 1 +NULL 2 1 +1 NULL 1 +2 NULL 1 +3 NULL 1 +4 NULL 1 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out index f4b0c91..e2d9d96 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out @@ -37,21 +37,21 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: a (type: string), b (type: string), '0' (type: string) + keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE @@ -118,21 +118,21 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: a (type: string), b (type: string), '0' (type: string) + keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE @@ -199,21 +199,21 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: a (type: string), b (type: string), '0' (type: string) + keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE @@ -279,19 +279,19 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: a (type: string), b (type: string), c (type: string), '0' (type: string) + keys: a (type: string), b (type: string), c (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 108 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) Statistics: Num rows: 3 Data size: 108 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE @@ -357,19 +357,19 @@ STAGE PLANS: outputColumnNames: a Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: a (type: string), '0' (type: string) + keys: a (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE @@ -427,21 +427,21 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: _col0 (type: double), '0' (type: string) + keys: _col0 (type: double), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string) + key expressions: _col0 (type: double), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string) + Map-reduce partition columns: _col0 (type: double), _col1 (type: int) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: double), KEY._col1 (type: string) + keys: KEY._col0 (type: double), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/groupby_grouping_window.q.out ql/src/test/results/clientpositive/groupby_grouping_window.q.out index 5cd9737..251f4f7 100644 --- ql/src/test/results/clientpositive/groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_window.q.out @@ -49,20 +49,20 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(live), max(comments) - keys: category (type: int), '0' (type: string) + keys: category (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: int) Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), max(VALUE._col1) - keys: KEY._col0 (type: int), KEY._col1 (type: string) + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/groupby_rollup1.q.out ql/src/test/results/clientpositive/groupby_rollup1.q.out index 19dccc8..5437315 100644 --- ql/src/test/results/clientpositive/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/groupby_rollup1.q.out @@ -41,20 +41,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -120,19 +120,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) - keys: key (type: string), '0' (type: string), val (type: string) + keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -193,12 +193,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -206,7 +206,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -222,7 +222,7 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -230,7 +230,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -297,19 +297,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) - keys: key (type: string), '0' (type: string), val (type: string) + keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -325,7 +325,7 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -333,7 +333,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: final outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -419,12 +419,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -435,7 +435,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -448,7 +448,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -464,7 +464,7 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -472,7 +472,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -508,7 +508,7 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -516,7 +516,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -532,7 +532,7 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -540,7 +540,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out index 573469b..84021a3 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out @@ -48,20 +48,20 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE @@ -1486,20 +1486,20 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -1670,20 +1670,20 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/limit_pushdown2.q.out ql/src/test/results/clientpositive/limit_pushdown2.q.out index 316d8e8..b44b529 100644 --- ql/src/test/results/clientpositive/limit_pushdown2.q.out +++ ql/src/test/results/clientpositive/limit_pushdown2.q.out @@ -709,20 +709,20 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(_col2) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct) Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE @@ -798,20 +798,20 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(_col2) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct) Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index 127478e..2fb434c 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -159,14 +159,14 @@ STAGE PLANS: outputColumnNames: s_store_id Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: s_store_id (type: string), '0' (type: string) + keys: s_store_id (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs @@ -174,7 +174,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE @@ -244,14 +244,14 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), '0' (type: string) + keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs @@ -259,12 +259,12 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -332,14 +332,14 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), '0' (type: string) + keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs @@ -347,12 +347,12 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/perf/query18.q.out ql/src/test/results/clientpositive/perf/query18.q.out index 1f4cfdb..cf11954 100644 --- ql/src/test/results/clientpositive/perf/query18.q.out +++ ql/src/test/results/clientpositive/perf/query18.q.out @@ -34,7 +34,7 @@ Stage-0 SHUFFLE [RS_43] PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_42] (rows=2108229765 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["avg(_col4)","avg(_col5)","avg(_col6)","avg(_col7)","avg(_col8)","avg(_col9)","avg(_col10)"],keys:_col0, _col1, _col2, _col3, '0' + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["avg(_col4)","avg(_col5)","avg(_col6)","avg(_col7)","avg(_col8)","avg(_col9)","avg(_col10)"],keys:_col0, _col1, _col2, _col3, 0 Select Operator [SEL_40] (rows=421645953 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] Merge Join Operator [MERGEJOIN_83] (rows=421645953 width=135) diff --git ql/src/test/results/clientpositive/perf/query22.q.out ql/src/test/results/clientpositive/perf/query22.q.out index 52fc566..6ff8896 100644 --- ql/src/test/results/clientpositive/perf/query22.q.out +++ ql/src/test/results/clientpositive/perf/query22.q.out @@ -1,6 +1,44 @@ -PREHOOK: query: explain select i_product_name ,i_brand ,i_class ,i_category ,avg(inv_quantity_on_hand) qoh from inventory ,date_dim ,item ,warehouse where inventory.inv_date_sk=date_dim.d_date_sk and inventory.inv_item_sk=item.i_item_sk and inventory.inv_warehouse_sk = warehouse.w_warehouse_sk and date_dim.d_month_seq between 1193 and 1193 + 11 group by i_product_name ,i_brand ,i_class ,i_category with rollup order by qoh, i_product_name, i_brand, i_class, i_category limit 100 +PREHOOK: query: explain +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + ,warehouse + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and d_month_seq between 1212 and 1212 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select i_product_name ,i_brand ,i_class ,i_category ,avg(inv_quantity_on_hand) qoh from inventory ,date_dim ,item ,warehouse where inventory.inv_date_sk=date_dim.d_date_sk and inventory.inv_item_sk=item.i_item_sk and inventory.inv_warehouse_sk = warehouse.w_warehouse_sk and date_dim.d_month_seq between 1193 and 1193 + 11 group by i_product_name ,i_brand ,i_class ,i_category with rollup order by qoh, i_product_name, i_brand, i_class, i_category limit 100 +POSTHOOK: query: explain +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + ,warehouse + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and d_month_seq between 1212 and 1212 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100 POSTHOOK: type: QUERY Plan optimized by CBO. @@ -31,7 +69,7 @@ Stage-0 SHUFFLE [RS_23] PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_22] (rows=250121525 width=15) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["avg(_col3)"],keys:_col8, _col9, _col10, _col11, '0' + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["avg(_col3)"],keys:_col8, _col9, _col10, _col11, 0 Select Operator [SEL_21] (rows=50024305 width=15) Output:["_col8","_col9","_col10","_col11","_col3"] Merge Join Operator [MERGEJOIN_46] (rows=50024305 width=15) @@ -79,7 +117,7 @@ Stage-0 Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_41] (rows=8116 width=1119) - predicate:(d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) + predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] diff --git ql/src/test/results/clientpositive/perf/query27.q.out ql/src/test/results/clientpositive/perf/query27.q.out index 40ce084..cbd7d29 100644 --- ql/src/test/results/clientpositive/perf/query27.q.out +++ ql/src/test/results/clientpositive/perf/query27.q.out @@ -1,6 +1,46 @@ -PREHOOK: query: explain select i_item_id, s_state, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, store, item where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_item_sk = item.i_item_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk and customer_demographics.cd_gender = 'F' and customer_demographics.cd_marital_status = 'D' and customer_demographics.cd_education_status = 'Unknown' and date_dim.d_year = 1998 and store.s_state in ('KS','AL', 'MN', 'AL', 'SC', 'VT') group by i_item_id, s_state order by i_item_id ,s_state limit 100 +PREHOOK: query: explain +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select i_item_id, s_state, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, store, item where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_item_sk = item.i_item_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk and customer_demographics.cd_gender = 'F' and customer_demographics.cd_marital_status = 'D' and customer_demographics.cd_education_status = 'Unknown' and date_dim.d_year = 1998 and store.s_state in ('KS','AL', 'MN', 'AL', 'SC', 'VT') group by i_item_id, s_state order by i_item_id ,s_state limit 100 +POSTHOOK: query: explain +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 POSTHOOK: type: QUERY Plan optimized by CBO. @@ -17,82 +57,84 @@ Stage-0 limit:100 Stage-1 Reducer 7 - File Output Operator [FS_35] - Limit [LIM_34] (rows=100 width=88) + File Output Operator [FS_36] + Limit [LIM_35] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_33] (rows=421657640 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Select Operator [SEL_34] (rows=1264972921 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_32] - Group By Operator [GBY_30] (rows=421657640 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)","avg(VALUE._col3)"],keys:KEY._col0, KEY._col1 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0, _col1 - Group By Operator [GBY_28] (rows=843315281 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["avg(_col4)","avg(_col5)","avg(_col7)","avg(_col6)"],keys:_col17, _col15 - Select Operator [SEL_27] (rows=843315281 width=88) - Output:["_col17","_col15","_col4","_col5","_col7","_col6"] - Merge Join Operator [MERGEJOIN_58] (rows=843315281 width=88) - Conds:RS_24._col1=RS_25._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col15","_col17"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=462000 width=1436) - Output:["_col0","_col1"] - Filter Operator [FIL_54] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_12] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_57] (rows=766650239 width=88) - Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col15"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=852 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_53] (rows=852 width=1910) - predicate:((s_state) IN ('KS', 'AL', 'MN', 'SC', 'VT') and s_store_sk is not null) - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_56] (rows=696954748 width=88) - Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_52] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_55] (rows=633595212 width=88) - Conds:RS_15._col2=RS_16._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col2 - Select Operator [SEL_2] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_50] (rows=575995635 width=88) - predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=232725 width=385) - Output:["_col0"] - Filter Operator [FIL_51] (rows=232725 width=385) - predicate:((cd_gender = 'F') and (cd_marital_status = 'D') and (cd_education_status = 'Unknown') and cd_demo_sk is not null) - TableScan [TS_3] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] + SHUFFLE [RS_33] + Select Operator [SEL_32] (rows=1264972921 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_31] (rows=1264972921 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)","avg(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_29] (rows=2529945843 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["avg(_col2)","avg(_col3)","avg(_col4)","avg(_col5)"],keys:_col0, _col1, 0 + Select Operator [SEL_27] (rows=843315281 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_59] (rows=843315281 width=88) + Conds:RS_24._col1=RS_25._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col15","_col17"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=462000 width=1436) + Output:["_col0","_col1"] + Filter Operator [FIL_55] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_12] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_58] (rows=766650239 width=88) + Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col15"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=852 width=1910) + Output:["_col0","_col1"] + Filter Operator [FIL_54] (rows=852 width=1910) + predicate:((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) + TableScan [TS_9] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_57] (rows=696954748 width=88) + Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_53] (rows=36524 width=1119) + predicate:((d_year = 2001) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_56] (rows=633595212 width=88) + Conds:RS_15._col2=RS_16._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_51] (rows=575995635 width=88) + predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=232725 width=385) + Output:["_col0"] + Filter Operator [FIL_52] (rows=232725 width=385) + predicate:((cd_gender = 'M') and (cd_marital_status = 'U') and (cd_education_status = '2 yr Degree') and cd_demo_sk is not null) + TableScan [TS_3] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] diff --git ql/src/test/results/clientpositive/perf/query36.q.out ql/src/test/results/clientpositive/perf/query36.q.out new file mode 100644 index 0000000..f5ec0f9 --- /dev/null +++ ql/src/test/results/clientpositive/perf/query36.q.out @@ -0,0 +1,149 @@ +PREHOOK: query: explain +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('SD','FL','MI','LA', + 'MO','SC','AL','GA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('SD','FL','MI','LA', + 'MO','SC','AL','GA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 + File Output Operator [FS_35] + Limit [LIM_34] (rows=100 width=88) + Number of rows:100 + Select Operator [SEL_33] (rows=1149975358 width=88) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_32] + Select Operator [SEL_30] (rows=1149975358 width=88) + Output:["_col0","_col1","_col2","_col3","_col4"] + PTF Operator [PTF_29] (rows=1149975358 width=88) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(_col4 / _col5) ASC NULLS FIRST","partition by:":"(grouping(_col6, 0) + grouping(_col6, 1)), CASE WHEN ((UDFToInteger(grouping(_col6, 1)) = 0)) THEN (_col0) ELSE (null) END"}] + Select Operator [SEL_28] (rows=1149975358 width=88) + Output:["_col0","_col1","_col4","_col5","_col6"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:(grouping(_col6, 0) + grouping(_col6, 1)), CASE WHEN ((UDFToInteger(grouping(_col6, 1)) = 0)) THEN (_col0) ELSE (null) END + Select Operator [SEL_26] (rows=1149975358 width=88) + Output:["_col0","_col1","_col4","_col5","_col6"] + Group By Operator [GBY_25] (rows=1149975358 width=88) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_23] (rows=2299950717 width=88) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1, 0 + Select Operator [SEL_21] (rows=766650239 width=88) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_51] (rows=766650239 width=88) + Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col3","_col4","_col10","_col11"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_48] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_9] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_50] (rows=696954748 width=88) + Conds:RS_15._col2=RS_16._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_47] (rows=852 width=1910) + predicate:((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC', 'AL', 'GA') and s_store_sk is not null) + TableScan [TS_6] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_49] (rows=633595212 width=88) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_45] (rows=575995635 width=88) + predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_46] (rows=36524 width=1119) + predicate:((d_year = 1999) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + diff --git ql/src/test/results/clientpositive/perf/query67.q.out ql/src/test/results/clientpositive/perf/query67.q.out index 41cd274..1f38027 100644 --- ql/src/test/results/clientpositive/perf/query67.q.out +++ ql/src/test/results/clientpositive/perf/query67.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain select * from (select i_category ,i_class @@ -23,11 +23,11 @@ from (select i_category ,date_dim ,store ,item - where store_sales.ss_sold_date_sk=date_dim.d_date_sk - and store_sales.ss_item_sk=item.i_item_sk - and store_sales.ss_store_sk = store.s_store_sk - and d_month_seq between 1193 and 1193+11 - group by i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id with rollup)dw1) dw2 + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1212 and 1212+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 where rk <= 100 order by i_category ,i_class @@ -41,7 +41,7 @@ order by i_category ,rk limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain select * from (select i_category ,i_class @@ -66,11 +66,11 @@ from (select i_category ,date_dim ,store ,item - where store_sales.ss_sold_date_sk=date_dim.d_date_sk - and store_sales.ss_item_sk=item.i_item_sk - and store_sales.ss_store_sk = store.s_store_sk - and d_month_seq between 1193 and 1193+11 - group by i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id with rollup)dw1) dw2 + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1212 and 1212+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 where rk <= 100 order by i_category ,i_class @@ -125,7 +125,7 @@ Stage-0 SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Group By Operator [GBY_23] (rows=6899852151 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col8)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, '0' + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col8)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, 0 Select Operator [SEL_21] (rows=766650239 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] Merge Join Operator [MERGEJOIN_54] (rows=766650239 width=88) @@ -173,7 +173,7 @@ Stage-0 Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0","_col2","_col3","_col4"] Filter Operator [FIL_49] (rows=8116 width=1119) - predicate:(d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) + predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] diff --git ql/src/test/results/clientpositive/perf/query70.q.out ql/src/test/results/clientpositive/perf/query70.q.out index 611af74..581fd9b 100644 --- ql/src/test/results/clientpositive/perf/query70.q.out +++ ql/src/test/results/clientpositive/perf/query70.q.out @@ -1,6 +1,76 @@ -PREHOOK: query: explain select sum(ss_net_profit) as total_sum ,s_state ,s_county ,grouping__id as lochierarchy , rank() over(partition by grouping__id, case when grouping__id == 2 then s_state end order by sum(ss_net_profit)) as rank_within_parent from store_sales ss join date_dim d1 on d1.d_date_sk = ss.ss_sold_date_sk join store s on s.s_store_sk = ss.ss_store_sk where d1.d_month_seq between 1193 and 1193+11 and s.s_state in ( select s_state from (select s_state as s_state, sum(ss_net_profit), rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking from store_sales, store, date_dim where d_month_seq between 1193 and 1193+11 and date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk group by s_state ) tmp1 where ranking <= 5 ) group by s_state,s_county with rollup order by lochierarchy desc ,case when lochierarchy = 0 then s_state end ,rank_within_parent limit 100 +PREHOOK: query: explain +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store s + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s.s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(ss_net_profit) as total_sum ,s_state ,s_county ,grouping__id as lochierarchy , rank() over(partition by grouping__id, case when grouping__id == 2 then s_state end order by sum(ss_net_profit)) as rank_within_parent from store_sales ss join date_dim d1 on d1.d_date_sk = ss.ss_sold_date_sk join store s on s.s_store_sk = ss.ss_store_sk where d1.d_month_seq between 1193 and 1193+11 and s.s_state in ( select s_state from (select s_state as s_state, sum(ss_net_profit), rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking from store_sales, store, date_dim where d_month_seq between 1193 and 1193+11 and date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk group by s_state ) tmp1 where ranking <= 5 ) group by s_state,s_county with rollup order by lochierarchy desc ,case when lochierarchy = 0 then s_state end ,rank_within_parent limit 100 +POSTHOOK: query: explain +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store s + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s.s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 POSTHOOK: type: QUERY Plan optimized by CBO. @@ -31,12 +101,12 @@ Stage-0 Select Operator [SEL_58] (rows=1045432122 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] PTF Operator [PTF_57] (rows=1045432122 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS FIRST","partition by:":"_col5, CASE WHEN ((_col5 = 2)) THEN (_col0) ELSE (null) END"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 0) + grouping(_col5, 1)), CASE WHEN ((UDFToInteger(grouping(_col5, 1)) = 0)) THEN (_col0) ELSE (null) END"}] Select Operator [SEL_56] (rows=1045432122 width=88) Output:["_col0","_col1","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_55] - PartitionCols:_col5, CASE WHEN ((_col5 = 2)) THEN (_col0) ELSE (null) END + PartitionCols:(grouping(_col5, 0) + grouping(_col5, 1)), CASE WHEN ((UDFToInteger(grouping(_col5, 1)) = 0)) THEN (_col0) ELSE (null) END Select Operator [SEL_54] (rows=1045432122 width=88) Output:["_col0","_col1","_col4","_col5"] Group By Operator [GBY_53] (rows=1045432122 width=88) @@ -45,7 +115,7 @@ Stage-0 SHUFFLE [RS_52] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_51] (rows=2090864244 width=88) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, '0' + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0 Select Operator [SEL_49] (rows=696954748 width=88) Output:["_col0","_col1","_col2"] Merge Join Operator [MERGEJOIN_92] (rows=696954748 width=88) @@ -63,14 +133,14 @@ Stage-0 Filter Operator [FIL_81] (rows=575995635 width=88) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,ss,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col0 Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_82] (rows=8116 width=1119) - predicate:(d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) + predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] <-Reducer 9 [SIMPLE_EDGE] @@ -84,7 +154,7 @@ Stage-0 Select Operator [SEL_8] (rows=1704 width=1910) Output:["_col0","_col1","_col2"] Filter Operator [FIL_83] (rows=1704 width=1910) - predicate:(s_store_sk is not null and s_state is not null) + predicate:(s_state is not null and s_store_sk is not null) TableScan [TS_6] (rows=1704 width=1910) default@store,s,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county","s_state"] <-Reducer 14 [SIMPLE_EDGE] @@ -143,7 +213,7 @@ Stage-0 Select Operator [SEL_14] (rows=8116 width=1119) Output:["_col0"] Filter Operator [FIL_86] (rows=8116 width=1119) - predicate:(d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) + predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_12] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] diff --git ql/src/test/results/clientpositive/perf/query80.q.out ql/src/test/results/clientpositive/perf/query80.q.out index 6595e89..10ec36a 100644 --- ql/src/test/results/clientpositive/perf/query80.q.out +++ ql/src/test/results/clientpositive/perf/query80.q.out @@ -47,7 +47,7 @@ Stage-0 Reduce Output Operator [RS_122] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_121] (rows=2435062716 width=108) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, '0' + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0 Select Operator [SEL_77] (rows=231905279 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] Group By Operator [GBY_76] (rows=231905279 width=135) @@ -139,7 +139,7 @@ Stage-0 Reduce Output Operator [RS_122] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_121] (rows=2435062716 width=108) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, '0' + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0 Select Operator [SEL_118] (rows=115958879 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] Group By Operator [GBY_117] (rows=115958879 width=135) @@ -231,7 +231,7 @@ Stage-0 Reduce Output Operator [RS_122] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_121] (rows=2435062716 width=108) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, '0' + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0 Select Operator [SEL_38] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] Group By Operator [GBY_37] (rows=463823414 width=88) diff --git ql/src/test/results/clientpositive/perf/query86.q.out ql/src/test/results/clientpositive/perf/query86.q.out new file mode 100644 index 0000000..cd8ed13 --- /dev/null +++ ql/src/test/results/clientpositive/perf/query86.q.out @@ -0,0 +1,126 @@ +PREHOOK: query: explain +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 + File Output Operator [FS_29] + Limit [LIM_28] (rows=100 width=135) + Number of rows:100 + Select Operator [SEL_27] (rows=261364852 width=135) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_26] + Select Operator [SEL_24] (rows=261364852 width=135) + Output:["_col0","_col1","_col2","_col3","_col4"] + PTF Operator [PTF_23] (rows=261364852 width=135) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 0) + grouping(_col5, 1)), CASE WHEN ((UDFToInteger(grouping(_col5, 1)) = 0)) THEN (_col0) ELSE (null) END"}] + Select Operator [SEL_22] (rows=261364852 width=135) + Output:["_col0","_col1","_col4","_col5"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:(grouping(_col5, 0) + grouping(_col5, 1)), CASE WHEN ((UDFToInteger(grouping(_col5, 1)) = 0)) THEN (_col0) ELSE (null) END + Select Operator [SEL_20] (rows=261364852 width=135) + Output:["_col0","_col1","_col4","_col5"] + Group By Operator [GBY_19] (rows=261364852 width=135) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_17] (rows=522729705 width=135) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0 + Select Operator [SEL_15] (rows=174243235 width=135) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_39] (rows=174243235 width=135) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col2","_col6","_col7"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_37] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_6] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_38] (rows=158402938 width=135) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=144002668 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_35] (rows=144002668 width=135) + predicate:(ws_sold_date_sk is not null and ws_item_sk is not null) + TableScan [TS_0] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_net_paid"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_36] (rows=8116 width=1119) + predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out index 6cb9015..990b695 100644 --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -102,6 +102,7 @@ from_utc_timestamp get_json_object get_splits greatest +grouping hash hex histogram_numeric diff --git ql/src/test/results/clientpositive/spark/groupby_cube1.q.out ql/src/test/results/clientpositive/spark/groupby_cube1.q.out index 30aeb5d..9bad0f6 100644 --- ql/src/test/results/clientpositive/spark/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_cube1.q.out @@ -46,21 +46,21 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -111,21 +111,21 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -202,26 +202,26 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -292,20 +292,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) - keys: key (type: string), '0' (type: string), val (type: string) + keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -371,12 +371,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -385,12 +385,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -399,7 +399,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -477,12 +477,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) - keys: key (type: string), '0' (type: string), val (type: string) + keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -490,12 +490,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -504,7 +504,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: final outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -595,12 +595,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -616,12 +616,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -630,12 +630,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -644,7 +644,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -665,12 +665,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -679,7 +679,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out index ca68ef3..ce003b8 100644 --- ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out @@ -46,21 +46,21 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -131,20 +131,20 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) - keys: key (type: string), '0' (type: string), val (type: string) + keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -210,12 +210,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -224,12 +224,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -238,7 +238,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -310,12 +310,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) - keys: key (type: string), '0' (type: string), val (type: string) + keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -323,12 +323,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -337,7 +337,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: final outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -428,12 +428,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -449,12 +449,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -463,12 +463,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -477,7 +477,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -498,12 +498,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -512,7 +512,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out index cf58d68..744c6d2 100644 --- ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out +++ ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out @@ -49,7 +49,7 @@ Stage-0 SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_3] (rows=27 width=5) - Output:["_col0","_col1","_col2","_col3"],keys:_col0, _col1, _col2, '0' + Output:["_col0","_col1","_col2","_col3"],keys:_col0, _col1, _col2, 0 Select Operator [SEL_1] (rows=9 width=5) Output:["_col0","_col1","_col2"] TableScan [TS_0] (rows=9 width=5) @@ -129,7 +129,7 @@ Stage-0 SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_3] (rows=45 width=5) - Output:["_col0","_col1","_col2","_col3"],keys:_col0, _col1, _col2, '0' + Output:["_col0","_col1","_col2","_col3"],keys:_col0, _col1, _col2, 0 Select Operator [SEL_1] (rows=9 width=5) Output:["_col0","_col1","_col2"] TableScan [TS_0] (rows=9 width=5) diff --git ql/src/test/results/clientpositive/vector_grouping_sets.q.out ql/src/test/results/clientpositive/vector_grouping_sets.q.out index aa9ee87..598ab38 100644 --- ql/src/test/results/clientpositive/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/vector_grouping_sets.q.out @@ -153,18 +153,18 @@ STAGE PLANS: outputColumnNames: s_store_id Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: s_store_id (type: string), '0' (type: string) + keys: s_store_id (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE @@ -228,23 +228,23 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), '0' (type: string) + keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -306,23 +306,23 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), '0' (type: string) + keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE File Output Operator