From b03be9dd28c6688bee7474670a148891884d1d1d Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Mon, 18 May 2015 11:11:51 -0700 Subject: [PATCH] HIVE-10741 : count distinct rewrite is not firing --- .../hadoop/hive/ql/parse/CalcitePlanner.java | 8 ++-- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 28 +++++++------- .../results/clientpositive/spark/auto_join32.q.out | 44 +++++++++++++--------- .../test/results/clientpositive/spark/count.q.out | 10 ++--- .../results/clientpositive/spark/groupby2.q.out | 8 ++-- .../results/clientpositive/spark/groupby3.q.out | 8 ++-- .../clientpositive/spark/groupby3_map.q.out | 8 ++-- .../spark/groupby3_map_multi_distinct.q.out | 8 ++-- .../clientpositive/spark/groupby3_map_skew.q.out | 8 ++-- .../clientpositive/spark/groupby3_noskew.q.out | 6 +-- .../spark/groupby3_noskew_multi_distinct.q.out | 6 +-- .../clientpositive/spark/groupby_map_ppr.q.out | 8 ++-- .../spark/groupby_map_ppr_multi_distinct.q.out | 8 ++-- .../results/clientpositive/spark/groupby_ppr.q.out | 8 ++-- .../clientpositive/spark/limit_pushdown.q.out | 12 +++--- .../spark/vector_count_distinct.q.out | 6 +-- 16 files changed, 96 insertions(+), 88 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index c412561..4760a22 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -379,7 +379,7 @@ boolean canCBOHandleAst(ASTNode ast, QB qb, PreCboCtx cboCtx) { } // Now check QB in more detail. canHandleQbForCbo returns null if query can // be handled. - String msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage); + String msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage, qb); if (msg == null) { return true; } @@ -408,11 +408,11 @@ boolean canCBOHandleAst(ASTNode ast, QB qb, PreCboCtx cboCtx) { * 2. Nested Subquery will return false for qbToChk.getIsQuery() */ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, - boolean topLevelQB, boolean verbose) { + boolean topLevelQB, boolean verbose, QB qb) { boolean isInTest = conf.getBoolVar(ConfVars.HIVE_IN_TEST); boolean isStrictTest = isInTest && !conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict"); - boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest; + boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest || distinctExprsExists(qb); if (!isStrictTest && hasEnoughJoins && !queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() @@ -2711,7 +2711,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept // 0. Check if we can handle the SubQuery; // canHandleQbForCbo returns null if the query can be handled. - String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled()); + String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled(), qb); if (reason != null) { String msg = "CBO can not handle Sub Query"; if (LOG.isDebugEnabled()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 30c87ad..086d9a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -233,7 +233,7 @@ private HashMap opToPartPruner; private HashMap opToPartList; protected HashMap> topOps; - private HashMap> topSelOps; + private final HashMap> topSelOps; protected LinkedHashMap, OpParseContext> opParseCtx; private List loadTableWork; private List loadFileWork; @@ -294,7 +294,7 @@ /** Not thread-safe. */ final ASTSearcher astSearcher = new ASTSearcher(); - + protected AnalyzeRewriteContext analyzeRewrite; private CreateTableDesc tableDesc; @@ -1421,7 +1421,7 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan /** * This is phase1 of supporting specifying schema in insert statement * insert into foo(z,y) select a,b from bar; - * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode) + * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode) * @throws SemanticException */ private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase1Ctx ctx_1) throws SemanticException { @@ -3880,14 +3880,14 @@ static boolean isRegex(String pattern, HiveConf conf) { * create table source (a int, b int); * create table target (x int, y int, z int); * insert into target(z,x) select * from source - * + * * Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks * as if the original query was written as * insert into target select b, null, a from source - * + * * if target schema is not specified, this is no-op - * - * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx) + * + * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx) * @throws SemanticException */ private void handleInsertStatementSpec(List col_list, String dest, @@ -3919,7 +3919,7 @@ private void handleInsertStatementSpec(List col_list, String dest, Table target = qb.getMetaData().getDestTableForAlias(dest); Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null; if(target == null && partition == null) { - throw new SemanticException(generateErrorMessage(selExprList, + throw new SemanticException(generateErrorMessage(selExprList, "No table/partition found in QB metadata for dest='" + dest + "'")); } ArrayList new_col_list = new ArrayList(); @@ -8581,7 +8581,7 @@ private Operator genSelectAllDesc(Operator input) throws SemanticException { } RowResolver outputRR = inputRR.duplicate(); Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new SelectDesc(colList, columnNames, true), + new SelectDesc(colList, columnNames, true), outputRR.getRowSchema(), input), outputRR); output.setColumnExprMap(columnExprMap); return output; @@ -8742,7 +8742,7 @@ private boolean matchExprLists(List list1, List list } // see if there are any distinct expressions - private boolean distinctExprsExists(QB qb) { + protected static boolean distinctExprsExists(QB qb) { QBParseInfo qbp = qb.getParseInfo(); TreeSet ks = new TreeSet(); @@ -8997,9 +8997,9 @@ private Operator genUnionPlan(String unionalias, String leftalias, if (leftmap.size() != rightmap.size()) { throw new SemanticException("Schema of both sides of union should match."); } - + RowResolver unionoutRR = new RowResolver(); - + Iterator> lIter = leftmap.entrySet().iterator(); Iterator> rIter = rightmap.entrySet().iterator(); while (lIter.hasNext()) { @@ -9008,7 +9008,7 @@ private Operator genUnionPlan(String unionalias, String leftalias, ColumnInfo lInfo = lEntry.getValue(); ColumnInfo rInfo = rEntry.getValue(); - String field = lEntry.getKey(); // use left alias (~mysql, postgresql) + String field = lEntry.getKey(); // use left alias (~mysql, postgresql) // try widening conversion, otherwise fail union TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType()); @@ -9158,7 +9158,7 @@ private Operator genUnionPlan(String unionalias, String leftalias, Iterator oIter = origInputFieldMap.values().iterator(); Iterator uIter = fieldMap.values().iterator(); - + List columns = new ArrayList(); boolean needsCast = false; while (oIter.hasNext()) { diff --git a/ql/src/test/results/clientpositive/spark/auto_join32.q.out b/ql/src/test/results/clientpositive/spark/auto_join32.q.out index c537b95..361a968 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join32.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join32.q.out @@ -428,30 +428,38 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: s + alias: v Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (name is not null and (p = 'bar')) (type: boolean) + predicate: ((p = 'bar') and name is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 name (type: string) - 1 name (type: string) - outputColumnNames: _col0, _col9 + Select Operator + expressions: name (type: string), registration (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col9) - keys: _col0 (type: string), _col9 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/count.q.out b/ql/src/test/results/clientpositive/spark/count.q.out index 6923a5f..cb9eda5 100644 --- a/ql/src/test/results/clientpositive/spark/count.q.out +++ b/ql/src/test/results/clientpositive/spark/count.q.out @@ -123,11 +123,11 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: a, b, c, d + outputColumnNames: _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1), count(), count(a), count(b), count(c), count(d), count(DISTINCT a), count(DISTINCT b), count(DISTINCT c), count(DISTINCT d), count(DISTINCT a, b), count(DISTINCT b, c), count(DISTINCT c, d), count(DISTINCT a, d), count(DISTINCT a, c), count(DISTINCT b, d), count(DISTINCT a, b, c), count(DISTINCT b, c, d), count(DISTINCT a, c, d), count(DISTINCT a, b, d), count(DISTINCT a, b, c, d) - keys: a (type: int), b (type: int), c (type: int), d (type: int) + aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) + keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE @@ -252,10 +252,10 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: a, b, c, d + outputColumnNames: _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: a (type: int), b (type: int), c (type: int), d (type: int) + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: ++++ Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Reducer 2 diff --git a/ql/src/test/results/clientpositive/spark/groupby2.q.out b/ql/src/test/results/clientpositive/spark/groupby2.q.out index f6be571..f9e3459 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2.q.out @@ -32,13 +32,13 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: substr(key, 1, 1) (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/groupby3.q.out b/ql/src/test/results/clientpositive/spark/groupby3.q.out index af63c0e..e48018c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3.q.out @@ -51,13 +51,13 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: substr(value, 5) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(value, 5) (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: substr(value, 5) (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map.q.out index 8379fc9..f806303 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map.q.out @@ -50,12 +50,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: substr(value, 5) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)) - keys: substr(value, 5) (type: string) + aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out index 5e9d229..3b31dfe 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out @@ -54,12 +54,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: substr(value, 5) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)), sum(DISTINCT substr(value, 5)), count(DISTINCT substr(value, 5)) - keys: substr(value, 5) (type: string) + aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0), sum(DISTINCT _col0), count(DISTINCT _col0) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out index d7f90f1..bbad6e7 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out @@ -51,12 +51,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: substr(value, 5) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)) - keys: substr(value, 5) (type: string) + aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out index 75cb50b..6868eff 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out @@ -50,11 +50,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: substr(value, 5) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(value, 5) (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 diff --git a/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out index 51831db..399bfd8 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out @@ -54,11 +54,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: substr(value, 5) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(value, 5) (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out index 517e492..8a26e81 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out @@ -114,12 +114,12 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT substr(value, 5)), sum(substr(value, 5)) - keys: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + aggregations: count(DISTINCT _col1), sum(_col1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out index d247c25..6005381 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out @@ -131,12 +131,12 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT substr(value, 5)), sum(substr(value, 5)), sum(DISTINCT substr(value, 5)), count(DISTINCT value) - keys: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + aggregations: count(DISTINCT _col1), sum(_col1), sum(DISTINCT _col1), count(DISTINCT _col2) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out index 8bc6105..e00d234 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out @@ -114,13 +114,13 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: substr(key, 1, 1) (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index 40af253..1efa9e7 100644 --- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -473,11 +473,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) - outputColumnNames: ctinyint, cdouble + outputColumnNames: _col0, _col1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT cdouble) - keys: ctinyint (type: tinyint), cdouble (type: double) + aggregations: count(DISTINCT _col1) + keys: _col0 (type: tinyint), _col1 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE @@ -660,11 +660,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: ctinyint, cstring1, cstring2 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2) - keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) + aggregations: count(DISTINCT _col1), count(DISTINCT _col2) + keys: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index 7fa7bdb..fecfe0a 100644 --- a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1257,11 +1257,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_order_number (type: int) - outputColumnNames: ws_order_number + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT ws_order_number) - keys: ws_order_number (type: int) + aggregations: count(DISTINCT _col0) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE -- 1.7.12.4 (Apple Git-37)