From 233e58b148be7514b610f6d3d5570d440e564927 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Mon, 18 May 2015 11:11:51 -0700 Subject: [PATCH] HIVE-10741 : count distinct rewrite is not firing --- .../hadoop/hive/ql/parse/CalcitePlanner.java | 8 +++---- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 28 +++++++++++----------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 6e6923c..12d5ba1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -378,7 +378,7 @@ boolean canCBOHandleAst(ASTNode ast, QB qb, PreCboCtx cboCtx) { } // Now check QB in more detail. canHandleQbForCbo returns null if query can // be handled. - String msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage); + String msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage, qb); if (msg == null) { return true; } @@ -407,11 +407,11 @@ boolean canCBOHandleAst(ASTNode ast, QB qb, PreCboCtx cboCtx) { * 2. Nested Subquery will return false for qbToChk.getIsQuery() */ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, - boolean topLevelQB, boolean verbose) { + boolean topLevelQB, boolean verbose, QB qb) { boolean isInTest = conf.getBoolVar(ConfVars.HIVE_IN_TEST); boolean isStrictTest = isInTest && !conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict"); - boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest; + boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest || distinctExprsExists(qb); if (!isStrictTest && hasEnoughJoins && !queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() @@ -2700,7 +2700,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept // 0. Check if we can handle the SubQuery; // canHandleQbForCbo returns null if the query can be handled. - String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled()); + String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled(), qb); if (reason != null) { String msg = "CBO can not handle Sub Query"; if (LOG.isDebugEnabled()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 30c87ad..086d9a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -233,7 +233,7 @@ private HashMap opToPartPruner; private HashMap opToPartList; protected HashMap> topOps; - private HashMap> topSelOps; + private final HashMap> topSelOps; protected LinkedHashMap, OpParseContext> opParseCtx; private List loadTableWork; private List loadFileWork; @@ -294,7 +294,7 @@ /** Not thread-safe. */ final ASTSearcher astSearcher = new ASTSearcher(); - + protected AnalyzeRewriteContext analyzeRewrite; private CreateTableDesc tableDesc; @@ -1421,7 +1421,7 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan /** * This is phase1 of supporting specifying schema in insert statement * insert into foo(z,y) select a,b from bar; - * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode) + * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode) * @throws SemanticException */ private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase1Ctx ctx_1) throws SemanticException { @@ -3880,14 +3880,14 @@ static boolean isRegex(String pattern, HiveConf conf) { * create table source (a int, b int); * create table target (x int, y int, z int); * insert into target(z,x) select * from source - * + * * Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks * as if the original query was written as * insert into target select b, null, a from source - * + * * if target schema is not specified, this is no-op - * - * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx) + * + * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx) * @throws SemanticException */ private void handleInsertStatementSpec(List col_list, String dest, @@ -3919,7 +3919,7 @@ private void handleInsertStatementSpec(List col_list, String dest, Table target = qb.getMetaData().getDestTableForAlias(dest); Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null; if(target == null && partition == null) { - throw new SemanticException(generateErrorMessage(selExprList, + throw new SemanticException(generateErrorMessage(selExprList, "No table/partition found in QB metadata for dest='" + dest + "'")); } ArrayList new_col_list = new ArrayList(); @@ -8581,7 +8581,7 @@ private Operator genSelectAllDesc(Operator input) throws SemanticException { } RowResolver outputRR = inputRR.duplicate(); Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new SelectDesc(colList, columnNames, true), + new SelectDesc(colList, columnNames, true), outputRR.getRowSchema(), input), outputRR); output.setColumnExprMap(columnExprMap); return output; @@ -8742,7 +8742,7 @@ private boolean matchExprLists(List list1, List list } // see if there are any distinct expressions - private boolean distinctExprsExists(QB qb) { + protected static boolean distinctExprsExists(QB qb) { QBParseInfo qbp = qb.getParseInfo(); TreeSet ks = new TreeSet(); @@ -8997,9 +8997,9 @@ private Operator genUnionPlan(String unionalias, String leftalias, if (leftmap.size() != rightmap.size()) { throw new SemanticException("Schema of both sides of union should match."); } - + RowResolver unionoutRR = new RowResolver(); - + Iterator> lIter = leftmap.entrySet().iterator(); Iterator> rIter = rightmap.entrySet().iterator(); while (lIter.hasNext()) { @@ -9008,7 +9008,7 @@ private Operator genUnionPlan(String unionalias, String leftalias, ColumnInfo lInfo = lEntry.getValue(); ColumnInfo rInfo = rEntry.getValue(); - String field = lEntry.getKey(); // use left alias (~mysql, postgresql) + String field = lEntry.getKey(); // use left alias (~mysql, postgresql) // try widening conversion, otherwise fail union TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType()); @@ -9158,7 +9158,7 @@ private Operator genUnionPlan(String unionalias, String leftalias, Iterator oIter = origInputFieldMap.values().iterator(); Iterator uIter = fieldMap.values().iterator(); - + List columns = new ArrayList(); boolean needsCast = false; while (oIter.hasNext()) { -- 1.7.12.4 (Apple Git-37)