diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 9c9d4e7..18b7162 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -93,7 +93,7 @@ INVALID_MAPINDEX_CONSTANT(10031, "Non-constant expression for map indexes not supported"), INVALID_MAPINDEX_TYPE(10032, "MAP key type does not match index expression type"), NON_COLLECTION_TYPE(10033, "[] not valid on non-collection types"), - SELECT_DISTINCT_WITH_GROUPBY(10034, "SELECT DISTINCT and GROUP BY can not be in the same query"), + @Deprecated SELECT_DISTINCT_WITH_GROUPBY(10034, "SELECT DISTINCT and GROUP BY can not be in the same query"), COLUMN_REPEATED_IN_PARTITIONING_COLS(10035, "Column repeated in partitioning columns"), DUPLICATE_COLUMN_NAMES(10036, "Duplicate column name:"), INVALID_BUCKET_NUMBER(10037, "Bucket number should be bigger than zero"), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index d6695cc..205da71 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -2877,6 +2877,18 @@ private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver return aInfo; } + private void expandSelectStar(QBParseInfo qbp, String dest, RowResolver inputRR, RelNode srcRel) { + final ASTNode selExprList = qbp.getSelForClause(dest); + if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI + && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { + ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0); + if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { + ASTNode newSelExprList = genSelectDIAST(inputRR); + qbp.setSelExprForClause(dest, newSelExprList); + } + } + } + /** * Generate GB plan. * @@ -2901,16 +2913,18 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException // SEL%SEL% rule. ASTNode selExprList = qb.getParseInfo().getSelForClause(detsClauseName); SubQueryUtils.checkForTopLevelSubqueries(selExprList); - if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI - && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { - ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0); - if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { - // As we said before, here we use genSelectLogicalPlan to rewrite AllColRef - srcRel = genSelectLogicalPlan(qb, srcRel, srcRel, null, null, true).getKey(); - RowResolver rr = this.relToHiveRR.get(srcRel); - qbp.setSelExprForClause(detsClauseName, SemanticAnalyzer.genSelectDIAST(rr)); - } - } + expandSelectStar(qbp, detsClauseName, relToHiveRR.get(srcRel), srcRel); + +// if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI +// && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { +// ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0); +// if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { +// // As we said before, here we use genSelectLogicalPlan to rewrite AllColRef +// srcRel = genSelectLogicalPlan(qb, srcRel, srcRel, null, null, true).getKey(); +// RowResolver rr = this.relToHiveRR.get(srcRel); +// qbp.setSelExprForClause(detsClauseName, SemanticAnalyzer.genSelectDIAST(rr)); +// } +// } // Select DISTINCT + windowing; GBy handled by genSelectForWindowing if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI && @@ -3057,8 +3071,39 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException this.relToHiveRR.put(gbRel, groupByOutputRowResolver); } - return gbRel; + return gbRel; + } + + private Pair genGBSelectDistinctPlan( + QBParseInfo qbp, String dest, Pair srcNodeRR) throws SemanticException { + final RelNode srcRel = srcNodeRR.left; + + // This comes from genSelectLogicalPlan, must be a project + // assert srcRel instanceof HiveProject; + + RowResolver inputRR = srcNodeRR.right; + if (inputRR == null) { + inputRR = relToHiveRR.get(srcRel); } + final RowResolver outputRR = inputRR; + + final List groupSetPositions = Lists.newArrayList(); + final RelDataType inputRT = srcRel.getRowType(); + int idx = 0; + + while (idx < inputRT.getFieldCount()) { + groupSetPositions.add(idx); + ++idx; + } + + HiveAggregate distAgg = new HiveAggregate( + cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + srcRel, false, + ImmutableBitSet.of(groupSetPositions), + null, new ArrayList()); + relToHiveRR.put(distAgg, outputRR); + return new Pair(distAgg, outputRR); + } /** * Generate OB RelNode and input Select RelNode that should be used to @@ -3608,6 +3653,23 @@ private void setQueryHints(QB qb) throws SemanticException { } } + + private Pair genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel, + ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite) + throws SemanticException { + QBParseInfo qbp = getQBParseInfo(qb); + String selClauseName = qbp.getClauseNames().iterator().next(); + ASTNode selExprList = qbp.getSelForClause(selClauseName); + + Pair retNodeRR = internalGenSelectLogicalPlan( + qb, srcRel, starSrcRel, outerNameToPosMap, outerRR, isAllColRefRewrite); + + if (selExprList.getType() == HiveParser.TOK_SELECTDI) { + retNodeRR = genGBSelectDistinctPlan(qbp, selClauseName, retNodeRR); + } + return retNodeRR; + } + /** * NOTE: there can only be one select caluse since we don't handle multi * destination insert. @@ -3626,7 +3688,7 @@ private void setQueryHints(QB qb) throws SemanticException { * @return RelNode: the select relnode RowResolver: i.e., originalRR, the RR after select when there is an order by. * @throws SemanticException */ - private Pair genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel, + private Pair internalGenSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel, ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite) throws SemanticException { // 0. Generate a Select Node for Windowing diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 4faec05..f51ff04 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1634,10 +1634,10 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan if (qbp.getJoinExpr() != null) { queryProperties.setHasJoinFollowedByGroupBy(true); } - if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) { - throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg())); - } +// if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) { +// throw new SemanticException(generateErrorMessage(ast, +// ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg())); +// } qbp.setGroupByExprForClause(ctx_1.dest, ast); skipRecursion = true; @@ -4056,40 +4056,47 @@ public static int unsetBit(int bitmap, int bitIdx) { } /** - * This function is a wrapper of parseInfo.getGroupByForClause which - * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY - * a,b,c. + * This function returns the GBY, if present + * DISTINCT, if present, will be handled when generating the SELECT */ List getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException { - if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) { - ASTNode selectExprs = parseInfo.getSelForClause(dest); - List result = new ArrayList(selectExprs == null ? 0 - : selectExprs.getChildCount()); - if (selectExprs != null) { - for (int i = 0; i < selectExprs.getChildCount(); ++i) { - if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) { - continue; + List result; + // When *not* invoked by CalcitePlanner, return the DISTINCT as a GBY + // CBO will handle the DISTINCT in CalcitePlannerAction.genSelectLogicalPlan + // + if (!(this instanceof CalcitePlanner)) { + if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) { + ASTNode selectExprs = parseInfo.getSelForClause(dest); + result = new ArrayList(selectExprs == null ? 0 + : selectExprs.getChildCount()); + if (selectExprs != null) { + for (int i = 0; i < selectExprs.getChildCount(); ++i) { + if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) { + continue; + } + // table.column AS alias + ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0); + result.add(grpbyExpr); } - // table.column AS alias - ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0); - result.add(grpbyExpr); } + return result; } - return result; - } else { - ASTNode grpByExprs = parseInfo.getGroupByForClause(dest); - List result = new ArrayList(grpByExprs == null ? 0 - : grpByExprs.getChildCount()); - if (grpByExprs != null) { - for (int i = 0; i < grpByExprs.getChildCount(); ++i) { - ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i); - if (grpbyExpr.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) { - result.add(grpbyExpr); - } + } + + // look for a true GBY + ASTNode grpByExprs = parseInfo.getGroupByForClause(dest); + if (grpByExprs != null) { + result = new ArrayList(grpByExprs.getChildCount()); + for (int i = 0; i < grpByExprs.getChildCount(); ++i) { + ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i); + if (grpbyExpr.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) { + result.add(grpbyExpr); } } - return result; + } else { + result = new ArrayList(0); } + return result; } static String[] getColAlias(ASTNode selExpr, String defaultName, diff --git a/ql/src/test/queries/clientpositive/distinct_gby.q b/ql/src/test/queries/clientpositive/distinct_gby.q new file mode 100644 index 0000000..182ce83 --- /dev/null +++ b/ql/src/test/queries/clientpositive/distinct_gby.q @@ -0,0 +1,36 @@ +explain select distinct key from src1; + +explain select distinct * from src1; + +explain select distinct count(*) from src1 where key in (1,2,3); + +explain select distinct count(*) from src1 where key in (1,2,3) group by key; + +explain select distinct key, count(*) from src1 where key in (1,2,3) group by key; + +explain select distinct * from (select * from src1) as T; + +explain select distinct * from (select count(*) from src1) as T; + +explain select distinct * from (select * from src1 where key in (1,2,3)) as T; + +explain select distinct * from (select count(*) from src1 where key in (1,2,3)) as T; + +explain select distinct * from (select distinct count(*) from src1 where key in (1,2,3)) as T; + +explain select distinct sum(value) over () from src1; + +explain select distinct * from (select sum(value) over () from src1) as T; + +explain select distinct count(*)+1 from src1; + +explain select distinct count(*)+key from src1 group by key; + +explain select distinct count(a.value), count(b.value) from src1 a join src1 b on a.key=b.key; + +explain select distinct count(a.value), count(b.value) from src1 a join src1 b on a.key=b.key group by a.key; + +-- should not project the virtual BLOCK_OFFSET et all columns +explain select distinct * from (select distinct * from src1) as T; + + diff --git a/ql/src/test/results/clientpositive/distinct_gby.q.out b/ql/src/test/results/clientpositive/distinct_gby.q.out new file mode 100644 index 0000000..5f918d0 --- /dev/null +++ b/ql/src/test/results/clientpositive/distinct_gby.q.out @@ -0,0 +1,1078 @@ +PREHOOK: query: explain select distinct key from src1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct key from src1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from src1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from src1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: struct) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: struct) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint), KEY._col3 (type: string), KEY._col4 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct count(*) from src1 where key in (1,2,3) +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct count(*) from src1 where key in (1,2,3) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key)) IN (1.0, 2.0, 3.0) (type: boolean) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct count(*) from src1 where key in (1,2,3) group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct count(*) from src1 where key in (1,2,3) group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key)) IN (1.0, 2.0, 3.0) (type: boolean) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct key, count(*) from src1 where key in (1,2,3) group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct key, count(*) from src1 where key in (1,2,3) group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key)) IN (1.0, 2.0, 3.0) (type: boolean) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select * from src1) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select * from src1) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select count(*) from src1) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select count(*) from src1) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select * from src1 where key in (1,2,3)) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select * from src1 where key in (1,2,3)) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key)) IN (1.0, 2.0, 3.0) (type: boolean) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select count(*) from src1 where key in (1,2,3)) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select count(*) from src1 where key in (1,2,3)) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key)) IN (1.0, 2.0, 3.0) (type: boolean) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (1,2,3)) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (1,2,3)) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key)) IN (1.0, 2.0, 3.0) (type: boolean) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct sum(value) over () from src1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct sum(value) over () from src1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col1 + name: sum + window function: GenericUDAFSumDouble + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select sum(value) over () from src1) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select sum(value) over () from src1) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col1 + name: sum + window function: GenericUDAFSumDouble + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct count(*)+1 from src1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct count(*)+1 from src1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 + 1) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct count(*)+key from src1 group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct count(*)+key from src1 group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToDouble(_col1) + UDFToDouble(_col0)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct count(a.value), count(b.value) from src1 a join src1 b on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src1 a join src1 b on a.key=b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: b + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1), count(_col3) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct count(a.value), count(b.value) from src1 a join src1 b on a.key=b.key group by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src1 a join src1 b on a.key=b.key group by a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: b + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1), count(_col3) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 13 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: bigint), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint) + Statistics: Num rows: 13 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select distinct * from src1) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select distinct * from src1) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: struct) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: struct) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint), KEY._col3 (type: string), KEY._col4 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: struct) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +