diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 6087e02..9936891 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -93,7 +93,7 @@ INVALID_MAPINDEX_CONSTANT(10031, "Non-constant expression for map indexes not supported"), INVALID_MAPINDEX_TYPE(10032, "MAP key type does not match index expression type"), NON_COLLECTION_TYPE(10033, "[] not valid on non-collection types"), - SELECT_DISTINCT_WITH_GROUPBY(10034, "SELECT DISTINCT and GROUP BY can not be in the same query"), + @Deprecated SELECT_DISTINCT_WITH_GROUPBY(10034, "SELECT DISTINCT and GROUP BY can not be in the same query"), COLUMN_REPEATED_IN_PARTITIONING_COLS(10035, "Column repeated in partitioning columns"), DUPLICATE_COLUMN_NAMES(10036, "Duplicate column name:"), INVALID_BUCKET_NUMBER(10037, "Bucket number should be bigger than zero"), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 3520d90..706fd54 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -3237,6 +3237,18 @@ private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver return aInfo; } + private void expandSelectStar(QBParseInfo qbp, String dest, RowResolver inputRR, RelNode srcRel) { + final ASTNode selExprList = qbp.getSelForClause(dest); + if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI + && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { + ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0); + if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { + ASTNode newSelExprList = genSelectDIAST(inputRR); + qbp.setSelExprForClause(dest, newSelExprList); + } + } + } + /** * Generate GB plan. * @@ -3261,16 +3273,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException // SEL%SEL% rule. ASTNode selExprList = qb.getParseInfo().getSelForClause(detsClauseName); SubQueryUtils.checkForTopLevelSubqueries(selExprList); - if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI - && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { - ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0); - if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { - // As we said before, here we use genSelectLogicalPlan to rewrite AllColRef - srcRel = genSelectLogicalPlan(qb, srcRel, srcRel, null, null, true).getKey(); - RowResolver rr = this.relToHiveRR.get(srcRel); - qbp.setSelExprForClause(detsClauseName, SemanticAnalyzer.genSelectDIAST(rr)); - } - } + expandSelectStar(qbp, detsClauseName, relToHiveRR.get(srcRel), srcRel); // Select DISTINCT + windowing; GBy handled by genSelectForWindowing if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI && @@ -3398,9 +3401,42 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException this.relToHiveRR.put(gbRel, groupByOutputRowResolver); } - return gbRel; + return gbRel; + } + + private Pair genGBSelectDistinctPlan( + QBParseInfo qbp, String dest, Pair srcNodeRR) throws SemanticException { + final RelNode srcRel = srcNodeRR.left; + + // This comes from genSelectLogicalPlan, must be a project + // assert srcRel instanceof HiveProject; + + RowResolver inputRR = srcNodeRR.right; + if (inputRR == null) { + inputRR = relToHiveRR.get(srcRel); + } + final RowResolver outputRR = inputRR; + + final List groupSetPositions = Lists.newArrayList(); + final RelDataType inputRT = srcRel.getRowType(); + int idx = 0; + + while (idx < inputRT.getFieldCount()) { + groupSetPositions.add(idx); + ++idx; } + HiveAggregate distAgg = new HiveAggregate( + cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + srcRel, + ImmutableBitSet.of(groupSetPositions), + null, new ArrayList()); + relToHiveRR.put(distAgg, outputRR); + relToHiveColNameCalcitePosMap.put(distAgg, + relToHiveColNameCalcitePosMap.get(srcRel)); + return new Pair(distAgg, outputRR); + } + /** * Generate OB RelNode and input Select RelNode that should be used to * introduce top constraining Project. If Input select RelNode is not @@ -3949,6 +3985,22 @@ private void setQueryHints(QB qb) throws SemanticException { } } + private Pair genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel, + ImmutableMap outerNameToPosMap, RowResolver outerRR, + boolean isAllColRefRewrite) throws SemanticException { + QBParseInfo qbp = getQBParseInfo(qb); + String selClauseName = qbp.getClauseNames().iterator().next(); + ASTNode selExprList = qbp.getSelForClause(selClauseName); + + Pair retNodeRR = internalGenSelectLogicalPlan( + qb, srcRel, starSrcRel, outerNameToPosMap, outerRR, isAllColRefRewrite); + + if (selExprList.getType() == HiveParser.TOK_SELECTDI) { + retNodeRR = genGBSelectDistinctPlan(qbp, selClauseName, retNodeRR); + } + return retNodeRR; + } + /** * NOTE: there can only be one select caluse since we don't handle multi * destination insert. @@ -3967,7 +4019,7 @@ private void setQueryHints(QB qb) throws SemanticException { * @return RelNode: the select relnode RowResolver: i.e., originalRR, the RR after select when there is an order by. * @throws SemanticException */ - private Pair genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel, + private Pair internalGenSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel, ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite) throws SemanticException { // 0. Generate a Select Node for Windowing @@ -4240,7 +4292,13 @@ private void setQueryHints(QB qb) throws SemanticException { // TODO: support unselected columns in genericUDTF and windowing functions. // We examine the order by in this query block and adds in column needed // by order by in select list. - if (obAST != null && !(selForWindow != null && selExprList.getToken().getType() == HiveParser.TOK_SELECTDI) && !isAllColRefRewrite) { + // + // If DISTINCT is present, it is not possible to ORDER BY unselected + // columns, and in fact adding all columns would change the behavior of + // DISTINCT, so we bypass this logic. + if (obAST != null + && selExprList.getToken().getType() != HiveParser.TOK_SELECTDI + && !isAllColRefRewrite) { // 1. OB Expr sanity test // in strict mode, in the presence of order by, limit must be // specified diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index cd6f1ee..4e346bd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1510,10 +1510,6 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan if (qbp.getJoinExpr() != null) { queryProperties.setHasJoinFollowedByGroupBy(true); } - if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) { - throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg())); - } qbp.setGroupByExprForClause(ctx_1.dest, ast); skipRecursion = true; @@ -3939,40 +3935,47 @@ public static long unsetBit(long bitmap, int bitIdx) { } /** - * This function is a wrapper of parseInfo.getGroupByForClause which - * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY - * a,b,c. + * Returns the GBY, if present; + * DISTINCT, if present, will be handled when generating the SELECT. */ List getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException { - if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) { - ASTNode selectExprs = parseInfo.getSelForClause(dest); - List result = new ArrayList(selectExprs == null ? 0 - : selectExprs.getChildCount()); - if (selectExprs != null) { - for (int i = 0; i < selectExprs.getChildCount(); ++i) { - if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) { - continue; + final List result; + // When *not* invoked by CalcitePlanner, return the DISTINCT as a GBY + // CBO will handle the DISTINCT in CalcitePlannerAction.genSelectLogicalPlan + // + if (!(this instanceof CalcitePlanner)) { + if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) { + ASTNode selectExprs = parseInfo.getSelForClause(dest); + result = new ArrayList(selectExprs == null ? 0 + : selectExprs.getChildCount()); + if (selectExprs != null) { + for (int i = 0; i < selectExprs.getChildCount(); ++i) { + if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) { + continue; + } + // table.column AS alias + ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0); + result.add(grpbyExpr); } - // table.column AS alias - ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0); - result.add(grpbyExpr); } + return result; } - return result; - } else { - ASTNode grpByExprs = parseInfo.getGroupByForClause(dest); - List result = new ArrayList(grpByExprs == null ? 0 - : grpByExprs.getChildCount()); - if (grpByExprs != null) { - for (int i = 0; i < grpByExprs.getChildCount(); ++i) { - ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i); - if (grpbyExpr.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) { - result.add(grpbyExpr); - } + } + + // look for a true GBY + ASTNode grpByExprs = parseInfo.getGroupByForClause(dest); + if (grpByExprs != null) { + result = new ArrayList(grpByExprs.getChildCount()); + for (int i = 0; i < grpByExprs.getChildCount(); ++i) { + ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i); + if (grpbyExpr.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) { + result.add(grpbyExpr); } } - return result; + } else { + result = new ArrayList(0); } + return result; } static String[] getColAlias(ASTNode selExpr, String defaultName, diff --git a/ql/src/test/queries/clientpositive/distinct_gby.q b/ql/src/test/queries/clientpositive/distinct_gby.q new file mode 100644 index 0000000..182ce83 --- /dev/null +++ b/ql/src/test/queries/clientpositive/distinct_gby.q @@ -0,0 +1,36 @@ +explain select distinct key from src1; + +explain select distinct * from src1; + +explain select distinct count(*) from src1 where key in (1,2,3); + +explain select distinct count(*) from src1 where key in (1,2,3) group by key; + +explain select distinct key, count(*) from src1 where key in (1,2,3) group by key; + +explain select distinct * from (select * from src1) as T; + +explain select distinct * from (select count(*) from src1) as T; + +explain select distinct * from (select * from src1 where key in (1,2,3)) as T; + +explain select distinct * from (select count(*) from src1 where key in (1,2,3)) as T; + +explain select distinct * from (select distinct count(*) from src1 where key in (1,2,3)) as T; + +explain select distinct sum(value) over () from src1; + +explain select distinct * from (select sum(value) over () from src1) as T; + +explain select distinct count(*)+1 from src1; + +explain select distinct count(*)+key from src1 group by key; + +explain select distinct count(a.value), count(b.value) from src1 a join src1 b on a.key=b.key; + +explain select distinct count(a.value), count(b.value) from src1 a join src1 b on a.key=b.key group by a.key; + +-- should not project the virtual BLOCK_OFFSET et all columns +explain select distinct * from (select distinct * from src1) as T; + + diff --git a/ql/src/test/queries/clientpositive/having2.q b/ql/src/test/queries/clientpositive/having2.q index 7b35365..be3cc0e 100644 --- a/ql/src/test/queries/clientpositive/having2.q +++ b/ql/src/test/queries/clientpositive/having2.q @@ -84,7 +84,7 @@ AND (COUNT(s2.value) > 4) ); explain -SELECT distinct s1.customer_name as x, s1.customer_name as y +SELECT distinct COUNT(*) AS c FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key HAVING ( (SUM(s1.customer_balance) <= 4074689.000000041) diff --git a/ql/src/test/queries/negative/wrong_distinct1.q b/ql/src/test/queries/negative/wrong_distinct1.q deleted file mode 100755 index d92c3bb..0000000 --- a/ql/src/test/queries/negative/wrong_distinct1.q +++ /dev/null @@ -1,2 +0,0 @@ -FROM src -INSERT OVERWRITE TABLE dest1 SELECT DISTINCT src.key, substr(src.value,4,1) GROUP BY src.key diff --git a/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out b/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out index b332293..e4f6b58 100644 --- a/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out +++ b/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out @@ -1 +1 @@ -FAILED: SemanticException 3:36 SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token 'key' +FAILED: SemanticException Line 0:-1 Expression not in GROUP BY key 'value' diff --git a/ql/src/test/results/clientnegative/udaf_invalid_place.q.out b/ql/src/test/results/clientnegative/udaf_invalid_place.q.out index 6727889..8b5e90f 100644 --- a/ql/src/test/results/clientnegative/udaf_invalid_place.q.out +++ b/ql/src/test/results/clientnegative/udaf_invalid_place.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10128]: Line 1:21 Not yet supported place for UDAF 'sum' +FAILED: SemanticException [Error 10025]: Line 1:16 Expression not in GROUP BY key 'key' diff --git a/ql/src/test/results/clientpositive/distinct_gby.q.out b/ql/src/test/results/clientpositive/distinct_gby.q.out new file mode 100644 index 0000000..cf28bf2 --- /dev/null +++ b/ql/src/test/results/clientpositive/distinct_gby.q.out @@ -0,0 +1,1078 @@ +PREHOOK: query: explain select distinct key from src1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct key from src1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from src1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from src1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: struct) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: struct) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint), KEY._col3 (type: string), KEY._col4 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct count(*) from src1 where key in (1,2,3) +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct count(*) from src1 where key in (1,2,3) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key)) IN (1.0, 2.0, 3.0) (type: boolean) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct count(*) from src1 where key in (1,2,3) group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct count(*) from src1 where key in (1,2,3) group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key)) IN (1.0, 2.0, 3.0) (type: boolean) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct key, count(*) from src1 where key in (1,2,3) group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct key, count(*) from src1 where key in (1,2,3) group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key)) IN (1.0, 2.0, 3.0) (type: boolean) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select * from src1) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select * from src1) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select count(*) from src1) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select count(*) from src1) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select * from src1 where key in (1,2,3)) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select * from src1 where key in (1,2,3)) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key)) IN (1.0, 2.0, 3.0) (type: boolean) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select count(*) from src1 where key in (1,2,3)) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select count(*) from src1 where key in (1,2,3)) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key)) IN (1.0, 2.0, 3.0) (type: boolean) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (1,2,3)) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (1,2,3)) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key)) IN (1.0, 2.0, 3.0) (type: boolean) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct sum(value) over () from src1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct sum(value) over () from src1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col1 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select sum(value) over () from src1) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select sum(value) over () from src1) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col1 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct count(*)+1 from src1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct count(*)+1 from src1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 + 1) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct count(*)+key from src1 group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct count(*)+key from src1 group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToDouble(_col1) + UDFToDouble(_col0)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct count(a.value), count(b.value) from src1 a join src1 b on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src1 a join src1 b on a.key=b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: b + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1), count(_col3) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct count(a.value), count(b.value) from src1 a join src1 b on a.key=b.key group by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src1 a join src1 b on a.key=b.key group by a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: b + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1), count(_col3) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 13 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: bigint), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint) + Statistics: Num rows: 13 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select distinct * from (select distinct * from src1) as T +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct * from (select distinct * from src1) as T +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: struct) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: struct) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint), KEY._col3 (type: string), KEY._col4 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: struct) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/having2.q.out b/ql/src/test/results/clientpositive/having2.q.out index 67f8af8..745102d 100644 --- a/ql/src/test/results/clientpositive/having2.q.out +++ b/ql/src/test/results/clientpositive/having2.q.out @@ -485,7 +485,7 @@ STAGE PLANS: ListSink PREHOOK: query: explain -SELECT distinct s1.customer_name as x, s1.customer_name as y +SELECT distinct COUNT(*) AS c FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key HAVING ( (SUM(s1.customer_balance) <= 4074689.000000041) @@ -494,7 +494,7 @@ AND (COUNT(s2.value) > 4) ) PREHOOK: type: QUERY POSTHOOK: query: explain -SELECT distinct s1.customer_name as x, s1.customer_name as y +SELECT distinct COUNT(*) AS c FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key HAVING ( (SUM(s1.customer_balance) <= 4074689.000000041) @@ -550,14 +550,13 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 + outputColumnNames: _col0, _col2, _col4 Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: sum(_col2), avg(_col0), count(_col4) - keys: _col1 (type: string) + aggregations: count(), sum(_col2), avg(_col0), count(_col4) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -570,36 +569,29 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: struct), _col3 (type: bigint) + sort order: + Statistics: Num rows: 1 Data size: 104 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2) - keys: KEY._col0 (type: string) + aggregations: count(VALUE._col0), sum(VALUE._col1), avg(VALUE._col2), count(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: double), _col3 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((_col2 <= 4074689.000000041) and (_col3 <= 822.0) and (_col4 > 4)) (type: boolean) - Statistics: Num rows: 10 Data size: 106 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 106 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 106 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 104 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((_col1 <= 4074689.000000041) and (_col2 <= 822.0) and (_col3 > 4)) (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 104 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 104 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -607,3 +599,171 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: DROP TABLE IF EXISTS src +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src +PREHOOK: Output: default@src +POSTHOOK: query: DROP TABLE IF EXISTS src +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src +PREHOOK: query: DROP TABLE IF EXISTS src1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src1 +POSTHOOK: query: DROP TABLE IF EXISTS src1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src1 +PREHOOK: query: DROP TABLE IF EXISTS src_json +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_json +PREHOOK: Output: default@src_json +POSTHOOK: query: DROP TABLE IF EXISTS src_json +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_json +POSTHOOK: Output: default@src_json +PREHOOK: query: DROP TABLE IF EXISTS src_sequencefile +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_sequencefile +PREHOOK: Output: default@src_sequencefile +POSTHOOK: query: DROP TABLE IF EXISTS src_sequencefile +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_sequencefile +POSTHOOK: Output: default@src_sequencefile +PREHOOK: query: DROP TABLE IF EXISTS src_thrift +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_thrift +PREHOOK: Output: default@src_thrift +POSTHOOK: query: DROP TABLE IF EXISTS src_thrift +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_thrift +POSTHOOK: Output: default@src_thrift +PREHOOK: query: DROP TABLE IF EXISTS srcbucket +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcbucket +PREHOOK: Output: default@srcbucket +POSTHOOK: query: DROP TABLE IF EXISTS srcbucket +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcbucket +POSTHOOK: Output: default@srcbucket +PREHOOK: query: DROP TABLE IF EXISTS srcbucket2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcbucket2 +PREHOOK: Output: default@srcbucket2 +POSTHOOK: query: DROP TABLE IF EXISTS srcbucket2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcbucket2 +POSTHOOK: Output: default@srcbucket2 +PREHOOK: query: DROP TABLE IF EXISTS srcpart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart +PREHOOK: Output: default@srcpart +POSTHOOK: query: DROP TABLE IF EXISTS srcpart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart +POSTHOOK: Output: default@srcpart +PREHOOK: query: DROP TABLE IF EXISTS primitives +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS primitives +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest4_sequencefile +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest4_sequencefile +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest_j1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest_j1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest_g1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest_g1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest_g2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest_g2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS fetchtask_ioexception +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS fetchtask_ioexception +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS alltypesorc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc +POSTHOOK: query: DROP TABLE IF EXISTS alltypesorc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc +PREHOOK: query: DROP TABLE IF EXISTS alltypesparquet +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypesparquet +PREHOOK: Output: default@alltypesparquet +POSTHOOK: query: DROP TABLE IF EXISTS alltypesparquet +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alltypesparquet +POSTHOOK: Output: default@alltypesparquet +PREHOOK: query: DROP TABLE IF EXISTS cbo_t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cbo_t1 +PREHOOK: Output: default@cbo_t1 +POSTHOOK: query: DROP TABLE IF EXISTS cbo_t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cbo_t1 +POSTHOOK: Output: default@cbo_t1 +PREHOOK: query: DROP TABLE IF EXISTS cbo_t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cbo_t2 +PREHOOK: Output: default@cbo_t2 +POSTHOOK: query: DROP TABLE IF EXISTS cbo_t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Output: default@cbo_t2 +PREHOOK: query: DROP TABLE IF EXISTS cbo_t3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cbo_t3 +PREHOOK: Output: default@cbo_t3 +POSTHOOK: query: DROP TABLE IF EXISTS cbo_t3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cbo_t3 +POSTHOOK: Output: default@cbo_t3 +PREHOOK: query: DROP TABLE IF EXISTS src_cbo +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_cbo +PREHOOK: Output: default@src_cbo +POSTHOOK: query: DROP TABLE IF EXISTS src_cbo +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_cbo +POSTHOOK: Output: default@src_cbo +PREHOOK: query: DROP TABLE IF EXISTS part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part +PREHOOK: Output: default@part +POSTHOOK: query: DROP TABLE IF EXISTS part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part +POSTHOOK: Output: default@part +PREHOOK: query: DROP TABLE IF EXISTS lineitem +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@lineitem +PREHOOK: Output: default@lineitem +POSTHOOK: query: DROP TABLE IF EXISTS lineitem +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@lineitem +POSTHOOK: Output: default@lineitem diff --git a/ql/src/test/results/compiler/errors/wrong_distinct1.q.out b/ql/src/test/results/compiler/errors/wrong_distinct1.q.out deleted file mode 100644 index 11f48a2..0000000 --- a/ql/src/test/results/compiler/errors/wrong_distinct1.q.out +++ /dev/null @@ -1,2 +0,0 @@ -Semantic Exception: -2:88 SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token 'key' \ No newline at end of file