diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index d49563b1ca..88b250d8fc 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -639,6 +639,7 @@ minillaplocal.query.files=\ mm_loaddata.q,\ mm_loaddata_split_change.q,\ mrr.q,\ + multigroupbydistinct.q,\ multiMapJoin1.q,\ multiMapJoin2.q,\ multi_in_clause.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java index 103d5e157e..e8b2c37089 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java @@ -16,21 +16,22 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import com.google.common.base.Preconditions; import java.math.BigDecimal; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Aggregate.Group; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.metadata.RelColumnOrigin; @@ -44,7 +45,6 @@ import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; -import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; @@ -58,7 +58,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.math.IntMath; @@ -112,7 +111,7 @@ public HiveExpandDistinctAggregatesRule( public void onMatch(RelOptRuleCall call) { final Aggregate aggregate = call.rel(0); int numCountDistinct = getNumCountDistinctCall(aggregate); - if (numCountDistinct == 0) { + if (numCountDistinct == 0 || aggregate.getGroupType() != Group.SIMPLE) { return; } @@ -121,7 +120,8 @@ public void onMatch(RelOptRuleCall call) { int nonDistinctCount = 0; List> argListList = new ArrayList>(); Set> argListSets = new LinkedHashSet>(); - Set positions = new HashSet<>(); + ImmutableBitSet.Builder newGroupSet = ImmutableBitSet.builder(); + newGroupSet.addAll(aggregate.getGroupSet()); for (AggregateCall aggCall : aggregate.getAggCallList()) { if (!aggCall.isDistinct()) { ++nonDistinctCount; @@ -130,33 +130,27 @@ public void onMatch(RelOptRuleCall call) { ArrayList argList = new ArrayList(); for (Integer arg : aggCall.getArgList()) { argList.add(arg); - positions.add(arg); + newGroupSet.set(arg); } // Aggr checks for sorted argList. argListList.add(argList); argListSets.add(argList); } - Util.permAssert(argListSets.size() > 0, "containsDistinctCall lied"); + Preconditions.checkArgument(argListSets.size() > 0, "containsDistinctCall lied"); - if (numCountDistinct > 1 && numCountDistinct == aggregate.getAggCallList().size() - && aggregate.getGroupSet().isEmpty()) { + if (numCountDistinct > 1 && numCountDistinct == aggregate.getAggCallList().size()) { LOG.debug("Trigger countDistinct rewrite. numCountDistinct is " + numCountDistinct); // now positions contains all the distinct positions, i.e., $5, $4, $6 // we need to first sort them as group by set // and then get their position later, i.e., $4->1, $5->2, $6->3 cluster = aggregate.getCluster(); rexBuilder = cluster.getRexBuilder(); - RelNode converted = null; - List sourceOfForCountDistinct = new ArrayList<>(); - sourceOfForCountDistinct.addAll(positions); - Collections.sort(sourceOfForCountDistinct); try { - converted = convert(aggregate, argListList, sourceOfForCountDistinct); + call.transformTo(convert(aggregate, argListList, newGroupSet.build())); } catch (CalciteSemanticException e) { LOG.debug(e.toString()); throw new RuntimeException(e); } - call.transformTo(converted); return; } @@ -200,19 +194,23 @@ public void onMatch(RelOptRuleCall call) { * (department_id, gender, education_level))subq; * @throws CalciteSemanticException */ - private RelNode convert(Aggregate aggregate, List> argList, List sourceOfForCountDistinct) throws CalciteSemanticException { + private RelNode convert(Aggregate aggregate, List> argList, ImmutableBitSet newGroupSet) + throws CalciteSemanticException { // we use this map to map the position of argList to the position of grouping set Map map = new HashMap<>(); List> cleanArgList = new ArrayList<>(); - final Aggregate groupingSets = createGroupingSets(aggregate, argList, cleanArgList, map, sourceOfForCountDistinct); - return createCount(groupingSets, argList, cleanArgList, map, sourceOfForCountDistinct); + final Aggregate groupingSets = createGroupingSets(aggregate, argList, cleanArgList, map, newGroupSet); + return createCount(groupingSets, argList, cleanArgList, map, aggregate.getGroupSet(), newGroupSet); } - private int getGroupingIdValue(List list, List sourceOfForCountDistinct, + private int getGroupingIdValue(List list, ImmutableBitSet originalGroupSet, ImmutableBitSet newGroupSet, int groupCount) { int ind = IntMath.pow(2, groupCount) - 1; + for (int pos : originalGroupSet) { + ind &= ~(1 << groupCount - newGroupSet.indexOf(pos) - 1); + } for (int i : list) { - ind &= ~(1 << groupCount - sourceOfForCountDistinct.indexOf(i) - 1); + ind &= ~(1 << groupCount - newGroupSet.indexOf(i) - 1); } return ind; } @@ -222,28 +220,28 @@ private int getGroupingIdValue(List list, List sourceOfForCoun * @param argList: the original argList in aggregate * @param cleanArgList: the new argList without duplicates * @param map: the mapping from the original argList to the new argList - * @param sourceOfForCountDistinct: the sorted positions of groupset + * @param newGroupSet: the sorted positions of groupset * @return * @throws CalciteSemanticException */ private RelNode createCount(Aggregate aggr, List> argList, List> cleanArgList, Map map, - List sourceOfForCountDistinct) throws CalciteSemanticException { - List originalInputRefs = Lists.transform(aggr.getRowType().getFieldList(), - new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); + ImmutableBitSet originalGroupSet, ImmutableBitSet newGroupSet) throws CalciteSemanticException { + final List originalInputRefs = aggr.getRowType().getFieldList() + .stream() + .map(input -> new RexInputRef(input.getIndex(), input.getType())) + .collect(Collectors.toList()); final List gbChildProjLst = Lists.newArrayList(); // for singular arg, count should not include null // e.g., count(case when i=1 and department_id is not null then 1 else null end) as c0, // for non-singular args, count can include null, i.e. (,) is counted as 1 for (List list : cleanArgList) { - RexNode condition = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, originalInputRefs - .get(originalInputRefs.size() - 1), rexBuilder.makeExactLiteral(new BigDecimal( - getGroupingIdValue(list, sourceOfForCountDistinct, aggr.getGroupCount())))); + RexNode condition = rexBuilder.makeCall( + SqlStdOperatorTable.EQUALS, + originalInputRefs.get(originalInputRefs.size() - 1), + rexBuilder.makeExactLiteral( + new BigDecimal( + getGroupingIdValue(list, originalGroupSet, newGroupSet, aggr.getGroupCount())))); if (list.size() == 1) { int pos = list.get(0); RexNode notNull = rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, @@ -257,6 +255,10 @@ public RexNode apply(RelDataTypeField input) { gbChildProjLst.add(when); } + for (int pos : originalGroupSet) { + gbChildProjLst.add(originalInputRefs.get(newGroupSet.indexOf(pos))); + } + // create the project before GB RelNode gbInputRel = HiveProject.create(aggr, gbChildProjLst, null); @@ -269,23 +271,25 @@ public RexNode apply(RelDataTypeField input) { TypeInfoFactory.longTypeInfo, i, aggFnRetType); aggregateCalls.add(aggregateCall); } + ImmutableBitSet groupSet = + ImmutableBitSet.range(cleanArgList.size(), cleanArgList.size() + originalGroupSet.cardinality()); Aggregate aggregate = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, - ImmutableBitSet.of(), null, aggregateCalls); + groupSet, null, aggregateCalls); // create the project after GB. For those repeated values, e.g., select // count(distinct x, y), count(distinct y, x), we find the correct mapping. if (map.isEmpty()) { return aggregate; } else { - List originalAggrRefs = Lists.transform(aggregate.getRowType().getFieldList(), - new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); + final List originalAggrRefs = aggregate.getRowType().getFieldList() + .stream() + .map(input -> new RexInputRef(input.getIndex(), input.getType())) + .collect(Collectors.toList()); final List projLst = Lists.newArrayList(); int index = 0; + for (int i = 0; i < groupSet.cardinality(); i++) { + projLst.add(originalAggrRefs.get(index++)); + } for (int i = 0; i < argList.size(); i++) { if (map.containsKey(i)) { projLst.add(originalAggrRefs.get(map.get(i))); @@ -302,18 +306,18 @@ public RexNode apply(RelDataTypeField input) { * @param argList: the original argList in aggregate * @param cleanArgList: the new argList without duplicates * @param map: the mapping from the original argList to the new argList - * @param sourceOfForCountDistinct: the sorted positions of groupset + * @param groupSet: new group set * @return */ private Aggregate createGroupingSets(Aggregate aggregate, List> argList, List> cleanArgList, Map map, - List sourceOfForCountDistinct) { - final ImmutableBitSet groupSet = ImmutableBitSet.of(sourceOfForCountDistinct); + ImmutableBitSet groupSet) { final List origGroupSets = new ArrayList<>(); for (int i = 0; i < argList.size(); i++) { List list = argList.get(i); - ImmutableBitSet bitSet = ImmutableBitSet.of(list); + ImmutableBitSet bitSet = aggregate.getGroupSet().union( + ImmutableBitSet.of(list)); int prev = origGroupSets.indexOf(bitSet); if (prev == -1) { origGroupSets.add(bitSet); @@ -323,7 +327,7 @@ private Aggregate createGroupingSets(Aggregate aggregate, List> ar } } // Calcite expects the grouping sets sorted and without duplicates - Collections.sort(origGroupSets, ImmutableBitSet.COMPARATOR); + origGroupSets.sort(ImmutableBitSet.COMPARATOR); List aggregateCalls = new ArrayList(); // Create GroupingID column diff --git a/ql/src/test/queries/clientpositive/multigroupbydistinct.q b/ql/src/test/queries/clientpositive/multigroupbydistinct.q new file mode 100644 index 0000000000..dbd81bae23 --- /dev/null +++ b/ql/src/test/queries/clientpositive/multigroupbydistinct.q @@ -0,0 +1,60 @@ +create table tabw4intcols (x integer, y integer, z integer, a integer); +insert into tabw4intcols values (1, 1, 1, 1), (2, 2, 2, 2), (3, 3, 3, 3), (4, 4, 4, 4), + (1, 2, 1, 2), (2, 3, 2, 3), (3, 4, 3, 4), (4, 1, 4, 1), + (1, 2, 3, 4), (4, 3, 2, 1), (1, 2, 3, 4), (4, 3, 2, 1); + +explain cbo +select z, count(distinct y), count(distinct a) +from tabw4intcols +group by z; + +select z, count(distinct y), count(distinct a) +from tabw4intcols +group by z; + +explain cbo +select z, x, count(distinct y), count(distinct a) +from tabw4intcols +group by z, x; + +select z, x, count(distinct y), count(distinct a) +from tabw4intcols +group by z, x; + +explain cbo +select x, z, count(distinct y), count(distinct a) +from tabw4intcols +group by z, x; + +select x, z, count(distinct y), count(distinct a) +from tabw4intcols +group by z, x; + +explain cbo +select x, a, y, count(distinct z) +from tabw4intcols +group by a, x, y; + +select x, a, y, count(distinct z) +from tabw4intcols +group by a, x, y; + +explain cbo +select x, count(distinct y), z, count(distinct a) +from tabw4intcols +group by z, x; + +select x, count(distinct y), z, count(distinct a) +from tabw4intcols +group by z, x; + +explain cbo +select count(distinct y), x, z, count(distinct a) +from tabw4intcols +group by z, x; + +select count(distinct y), x, z, count(distinct a) +from tabw4intcols +group by z, x; + +drop table tabw4intcols; diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out index 041bb28f3a..23038f0b0d 100644 --- a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out @@ -734,42 +734,49 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 1779850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: ctinyint, cstring1, cstring2 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 1779850 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2) - keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) + keys: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string), 0L (type: bigint) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 12288 Data size: 1976458 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24576 Data size: 3756114 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string) - sort order: +++ + key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + sort order: ++++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 1976458 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - Execution mode: llap + Statistics: Num rows: 24576 Data size: 3756114 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) - keys: KEY._col0 (type: tinyint) + keys: KEY._col0 (type: tinyint), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24576 Data size: 3756114 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN (((_col3 = 1L) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 2L) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24576 Data size: 3756114 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col2 (type: tinyint) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out b/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out index 2d2b6eb173..ebf6567ff6 100644 --- a/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out +++ b/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out @@ -804,38 +804,45 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 1779850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: ctinyint, cstring1, cstring2 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 1779850 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2) - keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) + keys: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string), 0L (type: bigint) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 12288 Data size: 1976458 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24576 Data size: 3756114 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string) - sort order: +++ + key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + sort order: ++++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 1976458 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - Execution mode: llap + Statistics: Num rows: 24576 Data size: 3756114 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) - keys: KEY._col0 (type: tinyint) + keys: KEY._col0 (type: tinyint), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24576 Data size: 3756114 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN (((_col3 = 1L) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 2L) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24576 Data size: 3756114 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col2 (type: tinyint) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/multigroupbydistinct.q.out b/ql/src/test/results/clientpositive/llap/multigroupbydistinct.q.out new file mode 100644 index 0000000000..6710ab2627 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/multigroupbydistinct.q.out @@ -0,0 +1,271 @@ +PREHOOK: query: create table tabw4intcols (x integer, y integer, z integer, a integer) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tabw4intcols +POSTHOOK: query: create table tabw4intcols (x integer, y integer, z integer, a integer) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tabw4intcols +PREHOOK: query: insert into tabw4intcols values (1, 1, 1, 1), (2, 2, 2, 2), (3, 3, 3, 3), (4, 4, 4, 4), + (1, 2, 1, 2), (2, 3, 2, 3), (3, 4, 3, 4), (4, 1, 4, 1), + (1, 2, 3, 4), (4, 3, 2, 1), (1, 2, 3, 4), (4, 3, 2, 1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tabw4intcols +POSTHOOK: query: insert into tabw4intcols values (1, 1, 1, 1), (2, 2, 2, 2), (3, 3, 3, 3), (4, 4, 4, 4), + (1, 2, 1, 2), (2, 3, 2, 3), (3, 4, 3, 4), (4, 1, 4, 1), + (1, 2, 3, 4), (4, 3, 2, 1), (1, 2, 3, 4), (4, 3, 2, 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tabw4intcols +POSTHOOK: Lineage: tabw4intcols.a SCRIPT [] +POSTHOOK: Lineage: tabw4intcols.x SCRIPT [] +POSTHOOK: Lineage: tabw4intcols.y SCRIPT [] +POSTHOOK: Lineage: tabw4intcols.z SCRIPT [] +PREHOOK: query: explain cbo +select z, count(distinct y), count(distinct a) +from tabw4intcols +group by z +PREHOOK: type: QUERY +PREHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select z, count(distinct y), count(distinct a) +from tabw4intcols +group by z +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{2}], agg#0=[count($0)], agg#1=[count($1)]) + HiveProject($f0=[CASE(AND(=($3, 1), IS NOT NULL($1)), 1, null:INTEGER)], $f1=[CASE(AND(=($3, 2), IS NOT NULL($2)), 1, null:INTEGER)], $f2=[$0]) + HiveAggregate(group=[{0, 1, 2}], groups=[[{0, 1}, {0, 2}]], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$2], $f1=[$1], $f2=[$3]) + HiveTableScan(table=[[default, tabw4intcols]], table:alias=[tabw4intcols]) + +PREHOOK: query: select z, count(distinct y), count(distinct a) +from tabw4intcols +group by z +PREHOOK: type: QUERY +PREHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +POSTHOOK: query: select z, count(distinct y), count(distinct a) +from tabw4intcols +group by z +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +1 2 2 +2 2 3 +3 3 2 +4 2 2 +PREHOOK: query: explain cbo +select z, x, count(distinct y), count(distinct a) +from tabw4intcols +group by z, x +PREHOOK: type: QUERY +PREHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select z, x, count(distinct y), count(distinct a) +from tabw4intcols +group by z, x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{2, 3}], agg#0=[count($0)], agg#1=[count($1)]) + HiveProject($f0=[CASE(AND(=($4, 1), IS NOT NULL($2)), 1, null:INTEGER)], $f1=[CASE(AND(=($4, 2), IS NOT NULL($3)), 1, null:INTEGER)], $f2=[$0], $f3=[$1]) + HiveAggregate(group=[{0, 1, 2, 3}], groups=[[{0, 1, 2}, {0, 1, 3}]], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$2], $f1=[$0], $f2=[$1], $f3=[$3]) + HiveTableScan(table=[[default, tabw4intcols]], table:alias=[tabw4intcols]) + +PREHOOK: query: select z, x, count(distinct y), count(distinct a) +from tabw4intcols +group by z, x +PREHOOK: type: QUERY +PREHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +POSTHOOK: query: select z, x, count(distinct y), count(distinct a) +from tabw4intcols +group by z, x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +1 1 2 2 +2 2 2 2 +2 4 1 1 +3 1 1 1 +3 3 2 2 +4 4 2 2 +PREHOOK: query: explain cbo +select x, z, count(distinct y), count(distinct a) +from tabw4intcols +group by z, x +PREHOOK: type: QUERY +PREHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select x, z, count(distinct y), count(distinct a) +from tabw4intcols +group by z, x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +CBO PLAN: +HiveProject(x=[$1], z=[$0], _o__c2=[$2], _o__c3=[$3]) + HiveAggregate(group=[{2, 3}], agg#0=[count($0)], agg#1=[count($1)]) + HiveProject($f0=[CASE(AND(=($4, 1), IS NOT NULL($2)), 1, null:INTEGER)], $f1=[CASE(AND(=($4, 2), IS NOT NULL($3)), 1, null:INTEGER)], $f2=[$0], $f3=[$1]) + HiveAggregate(group=[{0, 1, 2, 3}], groups=[[{0, 1, 2}, {0, 1, 3}]], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$2], $f1=[$0], $f2=[$1], $f3=[$3]) + HiveTableScan(table=[[default, tabw4intcols]], table:alias=[tabw4intcols]) + +PREHOOK: query: select x, z, count(distinct y), count(distinct a) +from tabw4intcols +group by z, x +PREHOOK: type: QUERY +PREHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +POSTHOOK: query: select x, z, count(distinct y), count(distinct a) +from tabw4intcols +group by z, x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +1 1 2 2 +2 2 2 2 +4 2 1 1 +1 3 1 1 +3 3 2 2 +4 4 2 2 +PREHOOK: query: explain cbo +select x, a, y, count(distinct z) +from tabw4intcols +group by a, x, y +PREHOOK: type: QUERY +PREHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select x, a, y, count(distinct z) +from tabw4intcols +group by a, x, y +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +CBO PLAN: +HiveProject(x=[$0], a=[$2], y=[$1], _o__c3=[$3]) + HiveAggregate(group=[{0, 1, 3}], agg#0=[count($2)]) + HiveProject(x=[$0], y=[$1], z=[$2], a=[$3]) + HiveAggregate(group=[{0, 1, 2, 3}]) + HiveTableScan(table=[[default, tabw4intcols]], table:alias=[tabw4intcols]) + +PREHOOK: query: select x, a, y, count(distinct z) +from tabw4intcols +group by a, x, y +PREHOOK: type: QUERY +PREHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +POSTHOOK: query: select x, a, y, count(distinct z) +from tabw4intcols +group by a, x, y +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +1 1 1 1 +1 2 2 1 +1 4 2 1 +2 2 2 1 +2 3 3 1 +3 3 3 1 +3 4 4 1 +4 1 1 1 +4 1 3 1 +4 4 4 1 +PREHOOK: query: explain cbo +select x, count(distinct y), z, count(distinct a) +from tabw4intcols +group by z, x +PREHOOK: type: QUERY +PREHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select x, count(distinct y), z, count(distinct a) +from tabw4intcols +group by z, x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +CBO PLAN: +HiveProject(x=[$1], _o__c1=[$2], z=[$0], _o__c3=[$3]) + HiveAggregate(group=[{2, 3}], agg#0=[count($0)], agg#1=[count($1)]) + HiveProject($f0=[CASE(AND(=($4, 1), IS NOT NULL($2)), 1, null:INTEGER)], $f1=[CASE(AND(=($4, 2), IS NOT NULL($3)), 1, null:INTEGER)], $f2=[$0], $f3=[$1]) + HiveAggregate(group=[{0, 1, 2, 3}], groups=[[{0, 1, 2}, {0, 1, 3}]], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$2], $f1=[$0], $f2=[$1], $f3=[$3]) + HiveTableScan(table=[[default, tabw4intcols]], table:alias=[tabw4intcols]) + +PREHOOK: query: select x, count(distinct y), z, count(distinct a) +from tabw4intcols +group by z, x +PREHOOK: type: QUERY +PREHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +POSTHOOK: query: select x, count(distinct y), z, count(distinct a) +from tabw4intcols +group by z, x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +1 2 1 2 +2 2 2 2 +4 1 2 1 +1 1 3 1 +3 2 3 2 +4 2 4 2 +PREHOOK: query: explain cbo +select count(distinct y), x, z, count(distinct a) +from tabw4intcols +group by z, x +PREHOOK: type: QUERY +PREHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select count(distinct y), x, z, count(distinct a) +from tabw4intcols +group by z, x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +CBO PLAN: +HiveProject(_o__c0=[$2], x=[$1], z=[$0], _o__c3=[$3]) + HiveAggregate(group=[{2, 3}], agg#0=[count($0)], agg#1=[count($1)]) + HiveProject($f0=[CASE(AND(=($4, 1), IS NOT NULL($2)), 1, null:INTEGER)], $f1=[CASE(AND(=($4, 2), IS NOT NULL($3)), 1, null:INTEGER)], $f2=[$0], $f3=[$1]) + HiveAggregate(group=[{0, 1, 2, 3}], groups=[[{0, 1, 2}, {0, 1, 3}]], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$2], $f1=[$0], $f2=[$1], $f3=[$3]) + HiveTableScan(table=[[default, tabw4intcols]], table:alias=[tabw4intcols]) + +PREHOOK: query: select count(distinct y), x, z, count(distinct a) +from tabw4intcols +group by z, x +PREHOOK: type: QUERY +PREHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct y), x, z, count(distinct a) +from tabw4intcols +group by z, x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tabw4intcols +#### A masked pattern was here #### +2 1 1 2 +2 2 2 2 +1 4 2 1 +1 1 3 1 +2 3 3 2 +2 4 4 2 +PREHOOK: query: drop table tabw4intcols +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tabw4intcols +PREHOOK: Output: default@tabw4intcols +POSTHOOK: query: drop table tabw4intcols +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tabw4intcols +POSTHOOK: Output: default@tabw4intcols diff --git a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out index 1e0aa93123..34c572ed37 100644 --- a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out +++ b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out @@ -741,43 +741,50 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 1779850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: ctinyint, cstring1, cstring2 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 1779850 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2) - keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) + keys: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string), 0L (type: bigint) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 12288 Data size: 1976458 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24576 Data size: 3756114 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string) - sort order: +++ + key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + sort order: ++++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 1976458 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 + Statistics: Num rows: 24576 Data size: 3756114 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) - keys: KEY._col0 (type: tinyint) + keys: KEY._col0 (type: tinyint), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Offset of rows: 10 - Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24576 Data size: 3756114 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN (((_col3 = 1L) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 2L) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24576 Data size: 3756114 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col2 (type: tinyint) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Offset of rows: 10 + Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out b/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out index 2bacc42f87..bd96597295 100644 --- a/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out @@ -48,38 +48,46 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) - outputColumnNames: id, key, name + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(DISTINCT key), count(DISTINCT name) - keys: id (type: int), key (type: int), name (type: int) - minReductionHashAggr: 0.6 + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), 0L (type: bigint) + minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - sort order: +++ + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN (((_col3 = 1L) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 2L) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col2 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -148,7 +156,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) @@ -156,33 +164,41 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(DISTINCT _col1), count(DISTINCT _col2) - keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) - minReductionHashAggr: 0.5 + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), 0L (type: bigint) + minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - sort order: +++ + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN (((_col3 = 1L) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 2L) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col2 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -238,7 +254,7 @@ STAGE PLANS: outputColumnNames: id, key, name Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: id (type: int), key (type: int), name (type: int) + keys: id (type: int), name (type: int), key (type: int) minReductionHashAggr: 0.6 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -251,7 +267,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) @@ -259,33 +275,41 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(DISTINCT _col2), count(DISTINCT _col1) - keys: _col0 (type: int), _col2 (type: int), _col1 (type: int) - minReductionHashAggr: 0.5 + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), 0L (type: bigint) + minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - sort order: +++ + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN (((_col3 = 1L) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 2L) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col2 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -329,6 +353,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -354,7 +379,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) @@ -362,22 +387,46 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(DISTINCT _col1), count(DISTINCT _col2) - keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) - minReductionHashAggr: 0.5 + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), 0L (type: bigint) + minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: bigint) + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN (((_col3 = 1L) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 2L) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col2 (type: int) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -432,6 +481,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -444,7 +494,7 @@ STAGE PLANS: outputColumnNames: id, key, name Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: id (type: int), key (type: int), name (type: int) + keys: id (type: int), name (type: int), key (type: int) minReductionHashAggr: 0.6 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -457,7 +507,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) @@ -465,22 +515,46 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(DISTINCT _col2), count(DISTINCT _col1) - keys: _col0 (type: int), _col2 (type: int), _col1 (type: int) - minReductionHashAggr: 0.5 + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), 0L (type: bigint) + minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: bigint) + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN (((_col3 = 1L) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 2L) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col2 (type: int) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + aggregations: count(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 diff --git a/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out index 042e4fe501..df7f9bceee 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out @@ -76,20 +76,20 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT value), count(DISTINCT key) - keys: key (type: string), value (type: string) + keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 382 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + sort order: +++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: @@ -145,19 +145,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN (((_col2 = 0L) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col2 = 1L) and _col0 is not null)) THEN (1) ELSE (null) END (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col2 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out index 434714422c..fe8d3f1511 100644 --- a/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out @@ -75,20 +75,20 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT value), count(DISTINCT key) - keys: key (type: string), value (type: string) + keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 382 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + sort order: +++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: @@ -122,19 +122,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN (((_col2 = 0L) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col2 = 1L) and _col0 is not null)) THEN (1) ELSE (null) END (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col2 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index 693198e914..8336176e8a 100644 --- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -704,39 +704,48 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: ctinyint, cstring1, cstring2 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2) - keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) + keys: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string), 0L (type: bigint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24576 Data size: 5815988 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string) - sort order: +++ + key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + sort order: ++++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 + Statistics: Num rows: 24576 Data size: 5815988 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) - keys: KEY._col0 (type: tinyint) + keys: KEY._col0 (type: tinyint), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 4720 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 4720 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN (((_col3 = 1L) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 2L) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col2 (type: tinyint) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4720 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 4720 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator