diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java new file mode 100644 index 0000000..404ac65 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java @@ -0,0 +1,163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME; + +import java.io.OutputStream; +import java.io.PrintStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import org.antlr.runtime.TokenRewriteStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.JoinType; +import org.apache.hadoop.hive.ql.parse.QB; +import org.apache.hadoop.hive.ql.parse.QBSubQuery; +import org.apache.hadoop.hive.ql.parse.SubQueryDiagnostic; +import org.apache.hadoop.hive.ql.plan.ExplainSQRewriteWork; +import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.StringUtils; + + +public class ExplainSQRewriteTask extends Task implements Serializable { + private static final long serialVersionUID = 1L; + + @Override + public StageType getType() { + return StageType.EXPLAIN; + } + + @Override + public int execute(DriverContext driverContext) { + + PrintStream out = null; + try { + Path resFile = new Path(work.getResFile()); + OutputStream outS = resFile.getFileSystem(conf).create(resFile); + out = new PrintStream(outS); + + QB qb = work.getQb(); + QBSubQuery sqW = qb.getWhereClauseSubQueryPredicate(); + QBSubQuery sqH = qb.getHavingClauseSubQueryPredicate(); + TokenRewriteStream stream = work.getCtx().getTokenRewriteStream(); + String program = "sq rewrite"; + ASTNode ast = work.getAst(); + + try { + if ( sqW != null ) { + addWhereRewrites(stream, sqW, program); + } + out.println(stream.toString(program, + ast.getTokenStartIndex(), ast.getTokenStopIndex())); + } finally { + stream.deleteProgram(program); + } + + out.close(); + out = null; + return (0); + } + catch (Exception e) { + console.printError("Failed with exception " + e.getMessage(), + "\n" + StringUtils.stringifyException(e)); + return (1); + } + finally { + IOUtils.closeStream(out); + } + } + + void addWhereRewrites(TokenRewriteStream stream, QBSubQuery sq, String program) { + ASTNode tokQry = work.getAst(); + ASTNode tokFrom = (ASTNode) tokQry.getChild(0); + ASTNode tokInsert = (ASTNode) tokQry.getChild(1); + ASTNode tokWhere = null; + + for(int i=0; i < tokInsert.getChildCount(); i++) { + if ( tokInsert.getChild(i).getType() == HiveParser.TOK_WHERE) { + tokWhere = (ASTNode) tokInsert.getChild(i); + break; + } + } + + if ( sq != null ) { + SubQueryDiagnostic.QBSubQueryRewrite diag = sq.getDiagnostic(); + String sqStr = diag.getRewrittenQuery(); + String joinCond = diag.getJoiningCondition(); + + stream.replace(program, sq.getOriginalSubQueryAST().getTokenStartIndex(), + sq.getOriginalSubQueryAST().getTokenStopIndex(), + "1 = 1"); + + String sqJoin = " " + + getJoinKeyWord(sq) + + " " + + sqStr + + " " + + joinCond; + + stream.insertAfter(program, tokFrom.getTokenStopIndex(), sqJoin); + + String postJoinCond = diag.getOuterQueryPostJoinCond(); + if ( postJoinCond != null ) { + stream.insertAfter(program, tokWhere.getTokenStopIndex(), " and " + postJoinCond); + } + + } + } + + private String getJoinKeyWord(QBSubQuery sq) { + switch (sq.getJoinType()) { + case LEFTOUTER: + return "left outer join"; + case LEFTSEMI: + return "left semi join"; + case RIGHTOUTER: + return "right outer join"; + case FULLOUTER: + return "full outer join"; + case INNER: + default: + return "inner join"; + } + } + + @Override + public String getName() { + return "EXPLAIN REWRITE"; + } + + @Override + public List getResultSchema() { + FieldSchema tmpFieldSchema = new FieldSchema(); + List colList = new ArrayList(); + + tmpFieldSchema.setName(ExplainTask.EXPL_COLUMN_NAME); + tmpFieldSchema.setType(STRING_TYPE_NAME); + + colList.add(tmpFieldSchema); + return colList; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java index d0807d2..e60963b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.plan.CopyWork; import org.apache.hadoop.hive.ql.plan.DDLWork; import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork; +import org.apache.hadoop.hive.ql.plan.ExplainSQRewriteWork; import org.apache.hadoop.hive.ql.plan.ExplainWork; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FunctionWork; @@ -73,6 +74,8 @@ public taskTuple(Class workClass, Class> taskClass) { FunctionTask.class)); taskvec .add(new taskTuple(ExplainWork.class, ExplainTask.class)); + taskvec + .add(new taskTuple(ExplainSQRewriteWork.class, ExplainSQRewriteTask.class)); taskvec.add(new taskTuple(ConditionalWork.class, ConditionalTask.class)); taskvec.add(new taskTuple(MapredWork.class, diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSQRewriteSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSQRewriteSemanticAnalyzer.java new file mode 100644 index 0000000..efab36e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSQRewriteSemanticAnalyzer.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.ql.parse; + +import java.io.Serializable; +import java.util.Collections; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.plan.ExplainSQRewriteWork; + +public class ExplainSQRewriteSemanticAnalyzer extends BaseSemanticAnalyzer { + List fieldList; + + public ExplainSQRewriteSemanticAnalyzer(HiveConf conf) throws SemanticException { + super(conf); + } + + @SuppressWarnings("unchecked") + @Override + public void analyzeInternal(ASTNode ast) throws SemanticException { + + + ctx.setExplain(true); + + // Create a semantic analyzer for the query + ASTNode input = (ASTNode) ast.getChild(0); + SemanticAnalyzer sem = (SemanticAnalyzer) + SemanticAnalyzerFactory.get(conf, input); + sem.analyze(input, ctx); + sem.validate(); + + ctx.setResFile(new Path(ctx.getLocalTmpFileURI())); + List> tasks = sem.getRootTasks(); + if (tasks == null) { + tasks = Collections.emptyList(); + } + + ExplainSQRewriteWork work = new ExplainSQRewriteWork(ctx.getResFile().toString(), + sem.getQB(), + input, + ctx + ); + + Task explTask = TaskFactory.get(work, conf); + + fieldList = explTask.getResultSchema(); + rootTasks.add(explTask); + } + + @Override + public List getResultSchema() { + return fieldList; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 366b714..0c71989 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -263,6 +263,8 @@ KW_USER: 'USER'; KW_ROLE: 'ROLE'; KW_INNER: 'INNER'; KW_EXCHANGE: 'EXCHANGE'; +KW_SUBQUERY : 'SUBQUERY'; +KW_REWRITE : 'REWRITE'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index b146df6..276fa46 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -222,6 +222,7 @@ TOK_ALTERVIEW_DROPPARTS; TOK_ALTERVIEW_RENAME; TOK_VIEWPARTCOLS; TOK_EXPLAIN; +TOK_EXPLAIN_SQ_REWRITE; TOK_TABLESERIALIZER; TOK_TABLEPROPERTIES; TOK_TABLEPROPLIST; @@ -442,6 +443,8 @@ import java.util.HashMap; xlateMap.put("KW_PROPERTIES", "TBLPROPERTIES"); xlateMap.put("KW_VALUE_TYPE", "\$VALUE\$"); xlateMap.put("KW_ELEM_TYPE", "\$ELEM\$"); + xlateMap.put("KW_SUBQUERY", "SUBQUERY"); + xlateMap.put("KW_REWRITE", "REWRITE"); // Operators xlateMap.put("DOT", "."); @@ -568,8 +571,11 @@ statement explainStatement @init { msgs.push("explain statement"); } @after { msgs.pop(); } - : KW_EXPLAIN (explainOptions=KW_EXTENDED|explainOptions=KW_FORMATTED|explainOptions=KW_DEPENDENCY|explainOptions=KW_LOGICAL)? execStatement - -> ^(TOK_EXPLAIN execStatement $explainOptions?) + : KW_EXPLAIN + ( (explainOptions=KW_EXTENDED|explainOptions=KW_FORMATTED|explainOptions=KW_DEPENDENCY|explainOptions=KW_LOGICAL)? execStatement + -> ^(TOK_EXPLAIN execStatement $explainOptions?) | + KW_SUBQUERY KW_REWRITE queryStatementExpression -> ^(TOK_EXPLAIN_SQ_REWRITE queryStatementExpression) + ) ; execStatement diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 8cf5ad6..e5ce53c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -535,5 +535,5 @@ identifier nonReserved : - KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_INNER + KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_INNER | KW_SUBQUERY | KW_REWRITE ; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index 3fbe8e2..326457c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -76,7 +76,16 @@ * clause. */ private int numSubQueryPredicates; - + + /* + * for now a top level QB can have 1 where clause SQ predicate. + */ + private QBSubQuery whereClauseSubQueryPredicate; + + /* + * for now a top level QB can have 1 where clause SQ predicate. + */ + private QBSubQuery havingClauseSubQueryPredicate; // results @@ -336,5 +345,21 @@ protected int getNumSubQueryPredicates() { protected int incrNumSubQueryPredicates() { return ++numSubQueryPredicates; } + + void setWhereClauseSubQueryPredicate(QBSubQuery sq) { + whereClauseSubQueryPredicate = sq; + } + + public QBSubQuery getWhereClauseSubQueryPredicate() { + return whereClauseSubQueryPredicate; + } + + void setHavingClauseSubQueryPredicate(QBSubQuery sq) { + havingClauseSubQueryPredicate = sq; + } + + public QBSubQuery getHavingClauseSubQueryPredicate() { + return havingClauseSubQueryPredicate; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java index b9c7e6f..23f7a00 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -16,6 +16,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.ql.parse.SubQueryDiagnostic.QBSubQueryRewrite; public class QBSubQuery implements ISubQueryJoinInfo { @@ -435,6 +436,8 @@ void setSQRR(RowResolver sqRR) { private int numOuterCorrExprsForHaving; private NotInCheck notInCheck; + + private QBSubQueryRewrite subQueryDiagnostic; public QBSubQuery(String outerQueryId, int sqIdx, @@ -461,6 +464,8 @@ public QBSubQuery(String outerQueryId, if ( operator.getType() == SubQueryType.NOT_IN ) { notInCheck = new NotInCheck(); } + + subQueryDiagnostic = new QBSubQueryRewrite(this, ctx.getTokenRewriteStream()); } public ASTNode getSubQueryAST() { @@ -472,6 +477,10 @@ public ASTNode getOuterQueryExpression() { public SubQueryTypeDef getOperator() { return operator; } + + public ASTNode getOriginalSubQueryAST() { + return originalSQASTOrigin.getUsageNode(); + } void validateAndRewriteAST(RowResolver outerQueryRR, boolean forHavingClause, @@ -587,6 +596,7 @@ void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR, rewriteCorrConjunctForHaving(parentQueryJoinCond, true, outerQueryAlias, outerQueryRR, outerQueryCol); } + subQueryDiagnostic.addJoinCondition(parentQueryJoinCond, outerQueryCol != null, true); } joinConditionAST = SubQueryUtils.andAST(parentQueryJoinCond, joinConditionAST); setJoinType(); @@ -606,7 +616,11 @@ void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR, ASTNode updateOuterQueryFilter(ASTNode outerQryFilter) { if (postJoinConditionAST == null ) { return outerQryFilter; - } else if ( outerQryFilter == null ) { + } + + subQueryDiagnostic.addPostJoinCondition(postJoinConditionAST); + + if ( outerQryFilter == null ) { return postJoinConditionAST; } ASTNode node = SubQueryUtils.andAST(outerQryFilter, postJoinConditionAST); @@ -722,9 +736,13 @@ private void rewrite(RowResolver parentQueryRR, String exprAlias = getNextCorrExprAlias(); ASTNode sqExprAlias = SubQueryUtils.createAliasAST(exprAlias); ASTNode sqExprForCorr = SubQueryUtils.createColRefAST(alias, exprAlias); + boolean corrCondLeftIsRewritten = false; + boolean corrCondRightIsRewritten = false; if ( conjunct.getLeftExprType().refersSubQuery() ) { + corrCondLeftIsRewritten = true; if ( forHavingClause && conjunct.getRightOuterColInfo() != null ) { + corrCondRightIsRewritten = true; rewriteCorrConjunctForHaving(conjunctAST, false, outerQueryAlias, parentQueryRR, conjunct.getRightOuterColInfo()); } @@ -734,16 +752,21 @@ private void rewrite(RowResolver parentQueryRR, subQueryJoinAliasExprs.add(sqExprForCorr); ASTNode selExpr = SubQueryUtils.createSelectItem(conjunct.getLeftExpr(), sqExprAlias); selectClause.addChild(selExpr); + subQueryDiagnostic.addSelectClauseRewrite(conjunct.getLeftExpr(), exprAlias); numOfCorrelationExprsAddedToSQSelect++; if ( containsAggregationExprs ) { ASTNode gBy = getSubQueryGroupByAST(); SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getLeftExpr()); + subQueryDiagnostic.addGByClauseRewrite(conjunct.getLeftExpr()); } if ( notInCheck != null ) { notInCheck.addCorrExpr((ASTNode)conjunctAST.getChild(0)); } + subQueryDiagnostic.addJoinCondition(conjunctAST, corrCondLeftIsRewritten, corrCondRightIsRewritten); } else { + corrCondRightIsRewritten = true; if ( forHavingClause && conjunct.getLeftOuterColInfo() != null ) { + corrCondLeftIsRewritten = true; rewriteCorrConjunctForHaving(conjunctAST, true, outerQueryAlias, parentQueryRR, conjunct.getLeftOuterColInfo()); } @@ -753,17 +776,21 @@ private void rewrite(RowResolver parentQueryRR, subQueryJoinAliasExprs.add(sqExprForCorr); ASTNode selExpr = SubQueryUtils.createSelectItem(conjunct.getRightExpr(), sqExprAlias); selectClause.addChild(selExpr); + subQueryDiagnostic.addSelectClauseRewrite(conjunct.getRightExpr(), exprAlias); numOfCorrelationExprsAddedToSQSelect++; if ( containsAggregationExprs ) { ASTNode gBy = getSubQueryGroupByAST(); SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getRightExpr()); + subQueryDiagnostic.addGByClauseRewrite(conjunct.getRightExpr()); } if ( notInCheck != null ) { notInCheck.addCorrExpr((ASTNode)conjunctAST.getChild(1)); } + subQueryDiagnostic.addJoinCondition(conjunctAST, corrCondLeftIsRewritten, corrCondRightIsRewritten); } } else { sqNewSearchCond = SubQueryUtils.andAST(sqNewSearchCond, conjunctAST); + subQueryDiagnostic.addWhereClauseRewrite(conjunctAST); } } @@ -775,6 +802,7 @@ private void rewrite(RowResolver parentQueryRR, * left. */ sqNewSearchCond = SubQueryUtils.constructTrueCond(); + subQueryDiagnostic.addWhereClauseRewrite("1 = 1"); } whereClause.setChild(0, sqNewSearchCond); } @@ -811,6 +839,8 @@ private ASTNode getSubQueryGroupByAST() { for(ASTNode child : newChildren ) { subQueryAST.addChild(child); } + + subQueryDiagnostic.setAddGroupByClause(); return groupBy; } @@ -836,6 +866,11 @@ public int getNumOfCorrelationExprsAddedToSQSelect() { return numOfCorrelationExprsAddedToSQSelect; } + + public QBSubQueryRewrite getDiagnostic() { + return subQueryDiagnostic; + } + public QBSubQuery getSubQuery() { return this; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index ace1df9..ea3ff9f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1999,7 +1999,7 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, /* * Clone the Search AST; apply all rewrites on the clone. */ - ASTNode clonedSearchCond = (ASTNode) ParseDriver.adaptor.dupTree(searchCond); + ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); for(int i=0; i < subQueries.size(); i++) { @@ -2011,6 +2011,11 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, originalSubQueryAST, ctx); + if ( !forHavingClause ) { + qb.setWhereClauseSubQueryPredicate(subQuery); + } else { + qb.setHavingClauseSubQueryPredicate(subQuery); + } String havingInputAlias = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java index 542d59a..eb0e737 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java @@ -146,6 +146,8 @@ public static BaseSemanticAnalyzer get(HiveConf conf, ASTNode tree) switch (tree.getToken().getType()) { case HiveParser.TOK_EXPLAIN: return new ExplainSemanticAnalyzer(conf); + case HiveParser.TOK_EXPLAIN_SQ_REWRITE: + return new ExplainSQRewriteSemanticAnalyzer(conf); case HiveParser.TOK_LOAD: return new LoadSemanticAnalyzer(conf); case HiveParser.TOK_EXPORT: diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java new file mode 100644 index 0000000..65ff763 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java @@ -0,0 +1,195 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import org.antlr.runtime.TokenRewriteStream; + + +/* + * Contains functionality that helps with understanding how a SubQuery was rewritten. + */ +public class SubQueryDiagnostic { + + public static class QBSubQueryRewrite { + QBSubQuery subQuery; + TokenRewriteStream stream; + + /* + * the rewritten where Clause + */ + String whereClause; + + /* + * any additions to the SubQueries Select Clause. + */ + String selectClauseAdditions; + + /* + * additions to the Group By Clause. + */ + String gByClauseAdditions; + boolean addGroupByClause; + + String joiningCondition; + + String outerQueryPostJoinCond; + + + QBSubQueryRewrite(QBSubQuery subQuery, + TokenRewriteStream stream) { + this.subQuery = subQuery; + this.stream = stream; + } + + public String getRewrittenQuery() { + + ASTNode sqAST = subQuery.getSubQueryAST(); + + if (whereClause != null) { + ASTNode whereAST = (ASTNode) sqAST.getChild(1).getChild(2); + stream.replace(subQuery.getAlias(), + whereAST.getTokenStartIndex(), + whereAST.getTokenStopIndex(), + whereClause); + } + + if (selectClauseAdditions != null) { + ASTNode selectClause = (ASTNode) sqAST.getChild(1).getChild(1); + stream.insertAfter(subQuery.getAlias(), + selectClause.getTokenStopIndex(), selectClauseAdditions); + } + + if (gByClauseAdditions != null) { + if (!addGroupByClause) { + ASTNode groupBy = (ASTNode) sqAST.getChild(1).getChild(3); + stream.insertAfter(subQuery.getAlias(), + groupBy.getTokenStopIndex(), gByClauseAdditions); + } + else { + gByClauseAdditions = " group by " + gByClauseAdditions; + stream.insertAfter(subQuery.getAlias(), + sqAST.getTokenStopIndex() - 1, gByClauseAdditions); + } + } + + try { + return + stream.toString(subQuery.getAlias(), + sqAST.getTokenStartIndex(), + sqAST.getTokenStopIndex()) + + " " + subQuery.getAlias(); + } finally { + stream.deleteProgram(subQuery.getAlias()); + } + } + + public String getJoiningCondition() { + return joiningCondition; + } + + void addWhereClauseRewrite(ASTNode predicate) { + String cond = stream.toString(predicate.getTokenStartIndex(), predicate.getTokenStopIndex()); + addWhereClauseRewrite(cond); + } + + void addWhereClauseRewrite(String cond) { + whereClause = whereClause == null ? "where " : whereClause + " and "; + whereClause += cond; + } + + void addSelectClauseRewrite(ASTNode selectExpr, String alias) { + if ( selectClauseAdditions == null ) { + selectClauseAdditions = ""; + } + + selectClauseAdditions += ", " + + stream.toString(selectExpr.getTokenStartIndex(), selectExpr.getTokenStopIndex()) + + " as " + alias; + } + + void setAddGroupByClause() { + this.addGroupByClause = true; + } + + + void addGByClauseRewrite(ASTNode selectExpr) { + if ( gByClauseAdditions == null ) { + gByClauseAdditions = ""; + } + + if ( !addGroupByClause || !gByClauseAdditions.equals("") ) { + gByClauseAdditions += ", "; + } + + gByClauseAdditions += stream.toString( + selectExpr.getTokenStartIndex(), + selectExpr.getTokenStopIndex()); + } + + /* + * joinCond represents a correlated predicate. + * leftIsRewritten, rightIsRewritten indicates if either side has been replaced by a column alias. + * + * If a side is not rewritten, we get its text from the tokenstream. + * For rewritten conditions we form the text based on the table and column reference. + */ + void addJoinCondition(ASTNode joinCond, boolean leftIsRewritten, boolean rightIsRewritten) { + StringBuilder b = new StringBuilder(); + + if ( joiningCondition == null ) { + joiningCondition = " on "; + } else { + b.append(" and "); + } + addCondition(b, (ASTNode) joinCond.getChild(0), leftIsRewritten); + b.append(" = "); + addCondition(b, (ASTNode) joinCond.getChild(1), rightIsRewritten); + + joiningCondition += b.toString(); + } + + private void addCondition(StringBuilder b, ASTNode cond, boolean rewritten) { + if ( !rewritten ) { + b.append(stream.toString(cond.getTokenStartIndex(), cond.getTokenStopIndex())); + } else { + addReference(b, cond); + } + } + + private void addReference(StringBuilder b, ASTNode ref) { + if ( ref.getType() == HiveParser.DOT ) { + b.append(ref.getChild(0).getChild(0).getText()). + append("."). + append(ref.getChild(1).getText()); + } else { + b.append(ref.getText()); + } + } + + void addPostJoinCondition(ASTNode cond) { + StringBuilder b = new StringBuilder(); + addReference(b, (ASTNode) cond.getChild(1)); + outerQueryPostJoinCond = b.toString() + " is null"; + } + + public String getOuterQueryPostJoinCond() { + return outerQueryPostJoinCond; + } + } + +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java index 8c03c7d..1474307 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java @@ -4,6 +4,7 @@ import java.util.List; import java.util.Map; +import org.antlr.runtime.tree.CommonTreeAdaptor; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -586,6 +587,12 @@ static ASTNode buildNotInNullJoinCond(String subqueryAlias, String cntAlias) { public String getOuterQueryId(); }; + + /* + * Using CommonTreeAdaptor because the Adaptor in ParseDriver doesn't carry + * the token indexes when duplicating a Tree. + */ + static final CommonTreeAdaptor adaptor = new CommonTreeAdaptor(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainSQRewriteWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainSQRewriteWork.java new file mode 100644 index 0000000..a7a4054 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainSQRewriteWork.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; + +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.QB; + +public class ExplainSQRewriteWork implements Serializable { + private static final long serialVersionUID = 1L; + + private String resFile; + private QB qb; + private ASTNode ast; + private Context ctx; + + + public ExplainSQRewriteWork() { + } + + public ExplainSQRewriteWork(String resFile, QB qb, ASTNode ast, Context ctx) { + this.resFile = resFile; + this.qb = qb; + this.ast = ast; + this.ctx = ctx; + } + + public String getResFile() { + return resFile; + } + + public QB getQb() { + return qb; + } + + public ASTNode getAst() { + return ast; + } + + public Context getCtx() { + return ctx; + } + +}