From fd259c43d7c6b232e1f93c27d640510c710ebbc5 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Wed, 20 May 2015 17:36:42 -0700 Subject: [PATCH] HIVE-10774 : ReduceSinkDedup undoes Distinct Rewrite --- ql/src/java/org/apache/hadoop/hive/ql/Context.java | 19 ++++--- .../ql/optimizer/calcite/HiveConfigContext.java | 11 +++- .../optimizer/calcite/cost/HiveAlgorithmsConf.java | 7 +-- .../optimizer/calcite/cost/HiveAlgorithmsUtil.java | 9 ++- .../rules/HiveExpandDistinctAggregatesRule.java | 3 + .../correlation/ReduceSinkDeDuplication.java | 6 ++ .../hadoop/hive/ql/parse/CalcitePlanner.java | 19 ++++--- .../clientpositive/tez/limit_pushdown.q.out | 40 ++++++++++---- ql/src/test/results/clientpositive/tez/mrr.q.out | 64 ++++++++++++++++------ .../clientpositive/tez/vectorization_limit.q.out | 41 ++++++++++---- 10 files changed, 150 insertions(+), 69 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index a74bbbe..89ce24b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -87,12 +87,15 @@ protected String cboInfo; protected boolean cboSucceeded; protected boolean explainLogical = false; + //boolean used by CalcitePlanner to communicate to Hive optimizer about distinct Rewrite rule. + private boolean distinctRewrite = false; + protected String cmd = ""; // number of previous attempts protected int tryCount = 0; private TokenRewriteStream tokenRewriteStream; - private String executionId; + private final String executionId; // List of Locks for this query protected List hiveLocks; @@ -551,13 +554,6 @@ public void resetStream() { } /** - * Little abbreviation for StringUtils. - */ - private static boolean strEquals(String str1, String str2) { - return org.apache.commons.lang.StringUtils.equals(str1, str2); - } - - /** * Set the token rewrite stream being used to parse the current top-level SQL * statement. Note that this should not be used for other parsing * activities; for example, when we encounter a reference to a view, we switch @@ -714,4 +710,11 @@ public void setCboSucceeded(boolean cboSucceeded) { this.cboSucceeded = cboSucceeded; } + public boolean isDistinctRewrite() { + return distinctRewrite; + } + + public void setDistinctRewrite(boolean distinctRewrite) { + this.distinctRewrite = distinctRewrite; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java index 0e559e0..3256a43 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java @@ -22,16 +22,21 @@ public class HiveConfigContext implements Context { - private HiveAlgorithmsConf config; + private final HiveAlgorithmsConf config; + private final org.apache.hadoop.hive.ql.Context ctx; - public HiveConfigContext(HiveAlgorithmsConf config) { + public HiveConfigContext(HiveAlgorithmsConf config, org.apache.hadoop.hive.ql.Context ctx) { this.config = config; + this.ctx = ctx; } + @Override public T unwrap(Class clazz) { if (clazz.isInstance(config)) { return clazz.cast(config); - } + } else if (clazz.isInstance(ctx)) { + return clazz.cast(ctx); + } else return null; } } \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsConf.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsConf.java index 83454ea..e5479c0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsConf.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsConf.java @@ -18,9 +18,9 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.cost; public class HiveAlgorithmsConf { - - private Double maxSplitSize; - private Double maxMemory; + + private final Double maxSplitSize; + private final Double maxMemory; public HiveAlgorithmsConf(Double maxSplitSize, Double maxMemory) { @@ -35,5 +35,4 @@ public Double getMaxSplitSize() { public Double getMaxMemory() { return maxMemory; } - } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java index 6840418..c4b90b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java @@ -34,7 +34,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin.MapJoinStreamingRelation; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import com.google.common.collect.ImmutableList; @@ -225,12 +224,12 @@ public static boolean isFittingIntoMemory(Double maxSize, RelNode input, int buc for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) { final RelFieldCollation leftFieldCollation = new RelFieldCollation(leftPos); collationListBuilder.add(leftFieldCollation); - leftCollationListBuilder.add(leftFieldCollation); + leftCollationListBuilder.add(leftFieldCollation); } for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) { final RelFieldCollation rightFieldCollation = new RelFieldCollation(rightPos); collationListBuilder.add(rightFieldCollation); - rightCollationListBuilder.add(rightFieldCollation); + rightCollationListBuilder.add(rightFieldCollation); } } @@ -285,10 +284,10 @@ public static RelDistribution getJoinDistribution(JoinPredicateInfo joinPredInfo JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo. getEquiJoinPredicateElements().get(i); for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) { - leftKeysListBuilder.add(leftPos); + leftKeysListBuilder.add(leftPos); } for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) { - rightKeysListBuilder.add(rightPos); + rightKeysListBuilder.add(rightPos); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java index 73c7cac..d93abd6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java @@ -30,6 +30,7 @@ import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import org.apache.calcite.util.Util; +import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; @@ -128,6 +129,8 @@ public void onMatch(RelOptRuleCall call) { aggregate, argListSets.iterator().next()); call.transformTo(converted); + aggregate.getCluster().getPlanner().getContext(). + unwrap(Context.class).setDistinctRewrite(true); return; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 404b759..3b4dc51 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -500,6 +500,12 @@ public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedup public Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { + + if (dedupCtx.getPctx().getContext().isDistinctRewrite()) { + // operator tree is purposefully expanded, lets not fold it back. + // See: HiveExpandDistinctAggregatesRule of Calcite. + return false; + } Operator start = CorrelationUtilities.getStartForGroupBy(cRS); GroupByOperator pGBY = CorrelationUtilities.findPossibleParent( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 4760a22..209d20e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -108,6 +108,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -412,9 +413,9 @@ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, boolean isInTest = conf.getBoolVar(ConfVars.HIVE_IN_TEST); boolean isStrictTest = isInTest && !conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict"); - boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest || distinctExprsExists(qb); + boolean hasEnoughJoinsOrDistincts = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest || distinctExprsExists(qb); - if (!isStrictTest && hasEnoughJoins && !queryProperties.hasClusterBy() + if (!isStrictTest && hasEnoughJoinsOrDistincts && !queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript() && !queryProperties.hasMultiDestQuery() && !queryProperties.hasLateralViews()) { @@ -427,8 +428,8 @@ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, if (verbose) { if (isStrictTest) msg += "is in test running in mode other than nonstrict; "; - if (!hasEnoughJoins) - msg += "has too few joins; "; + if (!hasEnoughJoinsOrDistincts) + msg += "has too few joins or distincts; "; if (queryProperties.hasClusterBy()) msg += "has cluster by; "; if (queryProperties.hasDistributeBy()) @@ -602,7 +603,7 @@ private static void replaceASTChild(ASTNode child, ASTNode newChild) { ASTNode getOptimizedAST() throws SemanticException { ASTNode optiqOptimizedAST = null; RelNode optimizedOptiqPlan = null; - CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction(prunedPartitions); + CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction(prunedPartitions, ctx); try { optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks @@ -624,7 +625,7 @@ ASTNode getOptimizedAST() throws SemanticException { */ Operator getOptimizedHiveOPDag() throws SemanticException { RelNode optimizedOptiqPlan = null; - CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction(prunedPartitions); + CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction(prunedPartitions, ctx); try { optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks @@ -770,14 +771,16 @@ private RowResolver genRowResolver(Operator op, QB qb) { private RelOptCluster cluster; private RelOptSchema relOptSchema; private final Map partitionCache; + private final Context ctx; // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or // just last one. LinkedHashMap relToHiveRR = new LinkedHashMap(); LinkedHashMap> relToHiveColNameCalcitePosMap = new LinkedHashMap>(); - CalcitePlannerAction(Map partitionCache) { + CalcitePlannerAction(Map partitionCache, Context ctx) { this.partitionCache = partitionCache; + this.ctx = ctx; } @Override @@ -794,7 +797,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu final Double maxMemory = (double) HiveConf.getLongVar( conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory); - HiveConfigContext confContext = new HiveConfigContext(algorithmsConf); + HiveConfigContext confContext = new HiveConfigContext(algorithmsConf, ctx); RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext); final RelOptQuery query = new RelOptQuery(planner); final RexBuilder rexBuilder = cluster.getRexBuilder(); diff --git a/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out b/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out index 2a41aae..64dde1c 100644 --- a/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out @@ -464,6 +464,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -483,7 +484,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ - Map-reduce partition columns: _col0 (type: tinyint) + Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: double) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -495,19 +496,34 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) keys: _col0 (type: tinyint) - mode: complete + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/mrr.q.out b/ql/src/test/results/clientpositive/tez/mrr.q.out index d90b27f..a201e29 100644 --- a/ql/src/test/results/clientpositive/tez/mrr.q.out +++ b/ql/src/test/results/clientpositive/tez/mrr.q.out @@ -401,9 +401,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -424,7 +425,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: s1 @@ -459,7 +460,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: @@ -471,16 +472,30 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) keys: _col0 (type: string) - mode: complete + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: bigint) + key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 4 Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 5 + Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 @@ -830,9 +845,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -855,7 +871,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Group By Operator @@ -866,9 +882,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 5 Map Operator Tree: TableScan alias: s1 @@ -895,16 +911,30 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) keys: _col0 (type: string) - mode: complete + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: bigint) + key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 4 + Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 diff --git a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out index 1c5b51f..a0486d8 100644 --- a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out @@ -333,6 +333,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -352,7 +353,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ - Map-reduce partition columns: _col0 (type: tinyint) + Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: double) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 @@ -365,19 +366,35 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) keys: _col0 (type: tinyint) - mode: complete + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator -- 1.7.12.4 (Apple Git-37)