diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java new file mode 100644 index 0000000..fb72305 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java @@ -0,0 +1,203 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME; + +import java.io.OutputStream; +import java.io.PrintStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.antlr.runtime.TokenRewriteStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.JoinType; +import org.apache.hadoop.hive.ql.parse.QB; +import org.apache.hadoop.hive.ql.parse.QBSubQuery; +import org.apache.hadoop.hive.ql.parse.SubQueryDiagnostic; +import org.apache.hadoop.hive.ql.plan.ExplainSQRewriteWork; +import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.StringUtils; + + +public class ExplainSQRewriteTask extends Task implements Serializable { + private static final long serialVersionUID = 1L; + + @Override + public StageType getType() { + return StageType.EXPLAIN; + } + + @Override + public int execute(DriverContext driverContext) { + + PrintStream out = null; + try { + Path resFile = new Path(work.getResFile()); + OutputStream outS = resFile.getFileSystem(conf).create(resFile); + out = new PrintStream(outS); + + QB qb = work.getQb(); + TokenRewriteStream stream = work.getCtx().getTokenRewriteStream(); + String program = "sq rewrite"; + ASTNode ast = work.getAst(); + + try { + addRewrites(stream, qb, program, out); + out.println("\nRewritten Query:\n" + stream.toString(program, + ast.getTokenStartIndex(), ast.getTokenStopIndex())); + } finally { + stream.deleteProgram(program); + } + + out.close(); + out = null; + return (0); + } + catch (Exception e) { + console.printError("Failed with exception " + e.getMessage(), + "\n" + StringUtils.stringifyException(e)); + return (1); + } + finally { + IOUtils.closeStream(out); + } + } + + void addRewrites(TokenRewriteStream stream, QB qb, String program, + PrintStream out) { + QBSubQuery sqW = qb.getWhereClauseSubQueryPredicate(); + QBSubQuery sqH = qb.getHavingClauseSubQueryPredicate(); + + if (sqW != null || sqH != null) { + + ASTNode sqNode = sqW != null ? sqW.getOriginalSubQueryASTForRewrite() + : sqH.getOriginalSubQueryASTForRewrite(); + ASTNode tokQry = getQueryASTNode(sqNode); + ASTNode tokFrom = (ASTNode) tokQry.getChild(0); + + StringBuilder addedJoins = new StringBuilder(); + + if (sqW != null) { + addRewrites(stream, sqW, program, out, qb.getId(), true, addedJoins); + } + + if (sqH != null) { + addRewrites(stream, sqH, program, out, qb.getId(), false, addedJoins); + } + stream.insertAfter(program, tokFrom.getTokenStopIndex(), addedJoins); + } + + Set sqAliases = qb.getSubqAliases(); + for(String sqAlias : sqAliases) { + addRewrites(stream, qb.getSubqForAlias(sqAlias).getQB(), program, out); + } + } + + void addRewrites(TokenRewriteStream stream, QBSubQuery sq, String program, + PrintStream out, String qbAlias, boolean isWhere, StringBuilder addedJoins) { + ASTNode sqNode = sq.getOriginalSubQueryASTForRewrite(); + ASTNode tokQry = getQueryASTNode(sqNode); + ASTNode tokInsert = (ASTNode) tokQry.getChild(1); + ASTNode tokWhere = null; + + for(int i=0; i < tokInsert.getChildCount(); i++) { + if ( tokInsert.getChild(i).getType() == HiveParser.TOK_WHERE) { + tokWhere = (ASTNode) tokInsert.getChild(i); + break; + } + } + + SubQueryDiagnostic.QBSubQueryRewrite diag = sq.getDiagnostic(); + String sqStr = diag.getRewrittenQuery(); + String joinCond = diag.getJoiningCondition(); + + stream.replace(program, sqNode.getTokenStartIndex(), + sqNode.getTokenStopIndex(), + "1 = 1"); + + String sqJoin = " " + + getJoinKeyWord(sq) + + " " + + sqStr + + " " + + joinCond; + addedJoins.append(" ").append(sqJoin); + + String postJoinCond = diag.getOuterQueryPostJoinCond(); + if ( postJoinCond != null ) { + stream.insertAfter(program, tokWhere.getTokenStopIndex(), " and " + postJoinCond); + } + + String qualifier = isWhere ? "Where Clause " : "Having Clause "; + if ( qbAlias != null ) { + qualifier = qualifier + "for Query Block '" + qbAlias + "' "; + } + out.println(String.format("\n%s Rewritten SubQuery:\n%s", + qualifier, diag.getRewrittenQuery())); + out.println(String.format("\n%s SubQuery Joining Condition:\n%s", + qualifier, diag.getJoiningCondition())); + } + + private String getJoinKeyWord(QBSubQuery sq) { + switch (sq.getJoinType()) { + case LEFTOUTER: + return "left outer join"; + case LEFTSEMI: + return "left semi join"; + case RIGHTOUTER: + return "right outer join"; + case FULLOUTER: + return "full outer join"; + case INNER: + default: + return "inner join"; + } + } + + private ASTNode getQueryASTNode(ASTNode node) { + while( node != null && node.getType() != HiveParser.TOK_QUERY ) { + node = (ASTNode) node.getParent(); + } + return node; + } + + @Override + public String getName() { + return "EXPLAIN REWRITE"; + } + + @Override + public List getResultSchema() { + FieldSchema tmpFieldSchema = new FieldSchema(); + List colList = new ArrayList(); + + tmpFieldSchema.setName(ExplainTask.EXPL_COLUMN_NAME); + tmpFieldSchema.setType(STRING_TYPE_NAME); + + colList.add(tmpFieldSchema); + return colList; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java index 679c6ec..ad6e19c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.plan.CopyWork; import org.apache.hadoop.hive.ql.plan.DDLWork; import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork; +import org.apache.hadoop.hive.ql.plan.ExplainSQRewriteWork; import org.apache.hadoop.hive.ql.plan.ExplainWork; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FunctionWork; @@ -78,6 +79,8 @@ public TaskTuple(Class workClass, Class> taskClass) { FunctionTask.class)); taskvec .add(new TaskTuple(ExplainWork.class, ExplainTask.class)); + taskvec + .add(new TaskTuple(ExplainSQRewriteWork.class, ExplainSQRewriteTask.class)); taskvec.add(new TaskTuple(ConditionalWork.class, ConditionalTask.class)); taskvec.add(new TaskTuple(MapredWork.class, diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSQRewriteSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSQRewriteSemanticAnalyzer.java new file mode 100644 index 0000000..d587dc0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSQRewriteSemanticAnalyzer.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.ql.parse; + +import java.io.Serializable; +import java.util.Collections; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.plan.ExplainSQRewriteWork; + +public class ExplainSQRewriteSemanticAnalyzer extends BaseSemanticAnalyzer { + List fieldList; + + public ExplainSQRewriteSemanticAnalyzer(HiveConf conf) throws SemanticException { + super(conf); + } + + @SuppressWarnings("unchecked") + @Override + public void analyzeInternal(ASTNode ast) throws SemanticException { + + + ctx.setExplain(true); + + // Create a semantic analyzer for the query + ASTNode input = (ASTNode) ast.getChild(0); + SemanticAnalyzer sem = (SemanticAnalyzer) + SemanticAnalyzerFactory.get(conf, input); + sem.analyze(input, ctx); + sem.validate(); + + ctx.setResFile(ctx.getLocalTmpPath()); + List> tasks = sem.getRootTasks(); + if (tasks == null) { + tasks = Collections.emptyList(); + } + + ExplainSQRewriteWork work = new ExplainSQRewriteWork(ctx.getResFile().toString(), + sem.getQB(), + input, + ctx + ); + + Task explTask = TaskFactory.get(work, conf); + + fieldList = explTask.getResultSchema(); + rootTasks.add(explTask); + } + + @Override + public List getResultSchema() { + return fieldList; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 3e673ca..1720861 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -294,6 +294,8 @@ KW_PRINCIPALS: 'PRINCIPALS'; KW_COMPACT: 'COMPACT'; KW_COMPACTIONS: 'COMPACTIONS'; KW_TRANSACTIONS: 'TRANSACTIONS'; +KW_SUBQUERY : 'SUBQUERY'; +KW_REWRITE : 'REWRITE'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 13bbf0a..5df2658 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -226,6 +226,7 @@ TOK_ALTERVIEW_DROPPARTS; TOK_ALTERVIEW_RENAME; TOK_VIEWPARTCOLS; TOK_EXPLAIN; +TOK_EXPLAIN_SQ_REWRITE; TOK_TABLESERIALIZER; TOK_TABLEPROPERTIES; TOK_TABLEPROPLIST; @@ -467,6 +468,8 @@ import java.util.HashMap; xlateMap.put("KW_VALUE_TYPE", "\$VALUE\$"); xlateMap.put("KW_ELEM_TYPE", "\$ELEM\$"); xlateMap.put("KW_DEFINED", "DEFINED"); + xlateMap.put("KW_SUBQUERY", "SUBQUERY"); + xlateMap.put("KW_REWRITE", "REWRITE"); // Operators xlateMap.put("DOT", "."); @@ -616,8 +619,11 @@ statement explainStatement @init { pushMsg("explain statement", state); } @after { popMsg(state); } - : KW_EXPLAIN (explainOptions=KW_EXTENDED|explainOptions=KW_FORMATTED|explainOptions=KW_DEPENDENCY|explainOptions=KW_LOGICAL)? execStatement - -> ^(TOK_EXPLAIN execStatement $explainOptions?) + : KW_EXPLAIN + ( (explainOptions=KW_EXTENDED|explainOptions=KW_FORMATTED|explainOptions=KW_DEPENDENCY|explainOptions=KW_LOGICAL)? execStatement + -> ^(TOK_EXPLAIN execStatement $explainOptions?) | + KW_SUBQUERY KW_REWRITE queryStatementExpression[true] -> ^(TOK_EXPLAIN_SQ_REWRITE queryStatementExpression) + ) ; execStatement diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 864e692..f5725fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -538,5 +538,5 @@ functionIdentifier nonReserved : - KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_PARQUETFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS + KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_PARQUETFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_SUBQUERY | KW_REWRITE ; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index a8b436e..908db1e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -76,7 +76,16 @@ * clause. */ private int numSubQueryPredicates; - + + /* + * for now a top level QB can have 1 where clause SQ predicate. + */ + private QBSubQuery whereClauseSubQueryPredicate; + + /* + * for now a top level QB can have 1 where clause SQ predicate. + */ + private QBSubQuery havingClauseSubQueryPredicate; // results @@ -340,5 +349,21 @@ protected int getNumSubQueryPredicates() { protected int incrNumSubQueryPredicates() { return ++numSubQueryPredicates; } + + void setWhereClauseSubQueryPredicate(QBSubQuery sq) { + whereClauseSubQueryPredicate = sq; + } + + public QBSubQuery getWhereClauseSubQueryPredicate() { + return whereClauseSubQueryPredicate; + } + + void setHavingClauseSubQueryPredicate(QBSubQuery sq) { + havingClauseSubQueryPredicate = sq; + } + + public QBSubQuery getHavingClauseSubQueryPredicate() { + return havingClauseSubQueryPredicate; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java index b7c9e65..d398c88 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.ql.parse.SubQueryDiagnostic.QBSubQueryRewrite; public class QBSubQuery implements ISubQueryJoinInfo { @@ -458,6 +459,8 @@ void setSQRR(RowResolver sqRR) { private int numOuterCorrExprsForHaving; private NotInCheck notInCheck; + + private QBSubQueryRewrite subQueryDiagnostic; public QBSubQuery(String outerQueryId, int sqIdx, @@ -484,6 +487,8 @@ public QBSubQuery(String outerQueryId, if ( operator.getType() == SubQueryType.NOT_IN ) { notInCheck = new NotInCheck(); } + + subQueryDiagnostic = SubQueryDiagnostic.getRewrite(this, ctx.getTokenRewriteStream(), ctx); } public ASTNode getSubQueryAST() { @@ -495,6 +500,13 @@ public ASTNode getOuterQueryExpression() { public SubQueryTypeDef getOperator() { return operator; } + + public ASTNode getOriginalSubQueryASTForRewrite() { + return (operator.getType() == SubQueryType.NOT_EXISTS + || operator.getType() == SubQueryType.NOT_IN ? + (ASTNode) originalSQASTOrigin.getUsageNode().getParent() : + originalSQASTOrigin.getUsageNode()); + } void validateAndRewriteAST(RowResolver outerQueryRR, boolean forHavingClause, @@ -650,6 +662,7 @@ void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR, rewriteCorrConjunctForHaving(parentQueryJoinCond, true, outerQueryAlias, outerQueryRR, outerQueryCol); } + subQueryDiagnostic.addJoinCondition(parentQueryJoinCond, outerQueryCol != null, true); } joinConditionAST = SubQueryUtils.andAST(parentQueryJoinCond, joinConditionAST); setJoinType(); @@ -669,7 +682,11 @@ void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR, ASTNode updateOuterQueryFilter(ASTNode outerQryFilter) { if (postJoinConditionAST == null ) { return outerQryFilter; - } else if ( outerQryFilter == null ) { + } + + subQueryDiagnostic.addPostJoinCondition(postJoinConditionAST); + + if ( outerQryFilter == null ) { return postJoinConditionAST; } ASTNode node = SubQueryUtils.andAST(outerQryFilter, postJoinConditionAST); @@ -781,9 +798,13 @@ private void rewrite(RowResolver parentQueryRR, String exprAlias = getNextCorrExprAlias(); ASTNode sqExprAlias = SubQueryUtils.createAliasAST(exprAlias); ASTNode sqExprForCorr = SubQueryUtils.createColRefAST(alias, exprAlias); + boolean corrCondLeftIsRewritten = false; + boolean corrCondRightIsRewritten = false; if ( conjunct.getLeftExprType().refersSubQuery() ) { + corrCondLeftIsRewritten = true; if ( forHavingClause && conjunct.getRightOuterColInfo() != null ) { + corrCondRightIsRewritten = true; rewriteCorrConjunctForHaving(conjunctAST, false, outerQueryAlias, parentQueryRR, conjunct.getRightOuterColInfo()); } @@ -793,16 +814,21 @@ private void rewrite(RowResolver parentQueryRR, subQueryJoinAliasExprs.add(sqExprForCorr); ASTNode selExpr = SubQueryUtils.createSelectItem(conjunct.getLeftExpr(), sqExprAlias); selectClause.addChild(selExpr); + subQueryDiagnostic.addSelectClauseRewrite(conjunct.getLeftExpr(), exprAlias); numOfCorrelationExprsAddedToSQSelect++; if ( containsAggregationExprs ) { ASTNode gBy = getSubQueryGroupByAST(); SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getLeftExpr()); + subQueryDiagnostic.addGByClauseRewrite(conjunct.getLeftExpr()); } if ( notInCheck != null ) { notInCheck.addCorrExpr((ASTNode)conjunctAST.getChild(0)); } + subQueryDiagnostic.addJoinCondition(conjunctAST, corrCondLeftIsRewritten, corrCondRightIsRewritten); } else { + corrCondRightIsRewritten = true; if ( forHavingClause && conjunct.getLeftOuterColInfo() != null ) { + corrCondLeftIsRewritten = true; rewriteCorrConjunctForHaving(conjunctAST, true, outerQueryAlias, parentQueryRR, conjunct.getLeftOuterColInfo()); } @@ -812,17 +838,21 @@ private void rewrite(RowResolver parentQueryRR, subQueryJoinAliasExprs.add(sqExprForCorr); ASTNode selExpr = SubQueryUtils.createSelectItem(conjunct.getRightExpr(), sqExprAlias); selectClause.addChild(selExpr); + subQueryDiagnostic.addSelectClauseRewrite(conjunct.getRightExpr(), exprAlias); numOfCorrelationExprsAddedToSQSelect++; if ( containsAggregationExprs ) { ASTNode gBy = getSubQueryGroupByAST(); SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getRightExpr()); + subQueryDiagnostic.addGByClauseRewrite(conjunct.getRightExpr()); } if ( notInCheck != null ) { notInCheck.addCorrExpr((ASTNode)conjunctAST.getChild(1)); } + subQueryDiagnostic.addJoinCondition(conjunctAST, corrCondLeftIsRewritten, corrCondRightIsRewritten); } } else { sqNewSearchCond = SubQueryUtils.andAST(sqNewSearchCond, conjunctAST); + subQueryDiagnostic.addWhereClauseRewrite(conjunctAST); } } @@ -834,6 +864,7 @@ private void rewrite(RowResolver parentQueryRR, * left. */ sqNewSearchCond = SubQueryUtils.constructTrueCond(); + subQueryDiagnostic.addWhereClauseRewrite("1 = 1"); } whereClause.setChild(0, sqNewSearchCond); } @@ -870,6 +901,8 @@ private ASTNode getSubQueryGroupByAST() { for(ASTNode child : newChildren ) { subQueryAST.addChild(child); } + + subQueryDiagnostic.setAddGroupByClause(); return groupBy; } @@ -895,6 +928,11 @@ public int getNumOfCorrelationExprsAddedToSQSelect() { return numOfCorrelationExprsAddedToSQSelect; } + + public QBSubQueryRewrite getDiagnostic() { + return subQueryDiagnostic; + } + public QBSubQuery getSubQuery() { return this; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 0e0395e..d5acb2e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2217,7 +2217,7 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, /* * Clone the Search AST; apply all rewrites on the clone. */ - ASTNode clonedSearchCond = (ASTNode) ParseDriver.adaptor.dupTree(searchCond); + ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); for(int i=0; i < subQueries.size(); i++) { @@ -2230,6 +2230,11 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, originalSubQueryAST, ctx); + if ( !forHavingClause ) { + qb.setWhereClauseSubQueryPredicate(subQuery); + } else { + qb.setHavingClauseSubQueryPredicate(subQuery); + } String havingInputAlias = null; if ( forHavingClause ) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java index f6b70d8..b6f3748 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java @@ -155,6 +155,8 @@ public static BaseSemanticAnalyzer get(HiveConf conf, ASTNode tree) switch (tree.getToken().getType()) { case HiveParser.TOK_EXPLAIN: return new ExplainSemanticAnalyzer(conf); + case HiveParser.TOK_EXPLAIN_SQ_REWRITE: + return new ExplainSQRewriteSemanticAnalyzer(conf); case HiveParser.TOK_LOAD: return new LoadSemanticAnalyzer(conf); case HiveParser.TOK_EXPORT: diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java new file mode 100644 index 0000000..f7dae23 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java @@ -0,0 +1,258 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import org.antlr.runtime.TokenRewriteStream; +import org.apache.hadoop.hive.ql.Context; + + +/* + * Contains functionality that helps with understanding how a SubQuery was rewritten. + */ +public class SubQueryDiagnostic { + + static QBSubQueryRewrite getRewrite(QBSubQuery subQuery, + TokenRewriteStream stream, + Context ctx) { + if (ctx.getExplain()) { + return new QBSubQueryRewrite(subQuery, stream); + } else { + return new QBSubQueryRewriteNoop(subQuery, stream); + } + } + + public static class QBSubQueryRewrite { + QBSubQuery subQuery; + TokenRewriteStream stream; + + /* + * the rewritten where Clause + */ + String whereClause; + + /* + * any additions to the SubQueries Select Clause. + */ + String selectClauseAdditions; + + /* + * additions to the Group By Clause. + */ + String gByClauseAdditions; + boolean addGroupByClause; + + String joiningCondition; + + String outerQueryPostJoinCond; + + + QBSubQueryRewrite(QBSubQuery subQuery, + TokenRewriteStream stream) { + this.subQuery = subQuery; + this.stream = stream; + } + + public String getRewrittenQuery() { + + ASTNode sqAST = subQuery.getSubQueryAST(); + + if (whereClause != null) { + ASTNode whereAST = (ASTNode) sqAST.getChild(1).getChild(2); + stream.replace(subQuery.getAlias(), + whereAST.getTokenStartIndex(), + whereAST.getTokenStopIndex(), + whereClause); + } + + if (selectClauseAdditions != null) { + ASTNode selectClause = (ASTNode) sqAST.getChild(1).getChild(1); + stream.insertAfter(subQuery.getAlias(), + selectClause.getTokenStopIndex(), selectClauseAdditions); + } + + if (gByClauseAdditions != null) { + if (!addGroupByClause) { + ASTNode groupBy = (ASTNode) sqAST.getChild(1).getChild(3); + stream.insertAfter(subQuery.getAlias(), + groupBy.getTokenStopIndex(), gByClauseAdditions); + } + else { + gByClauseAdditions = " group by " + gByClauseAdditions; + stream.insertAfter(subQuery.getAlias(), + sqAST.getTokenStopIndex() - 1, gByClauseAdditions); + } + } + + try { + return + stream.toString(subQuery.getAlias(), + sqAST.getTokenStartIndex(), + sqAST.getTokenStopIndex()) + + " " + subQuery.getAlias(); + } finally { + stream.deleteProgram(subQuery.getAlias()); + } + } + + public String getJoiningCondition() { + return joiningCondition; + } + + void addWhereClauseRewrite(ASTNode predicate) { + String cond = stream.toString(predicate.getTokenStartIndex(), predicate.getTokenStopIndex()); + addWhereClauseRewrite(cond); + } + + void addWhereClauseRewrite(String cond) { + whereClause = whereClause == null ? "where " : whereClause + " and "; + whereClause += cond; + } + + void addSelectClauseRewrite(ASTNode selectExpr, String alias) { + if ( selectClauseAdditions == null ) { + selectClauseAdditions = ""; + } + + selectClauseAdditions += ", " + + stream.toString(selectExpr.getTokenStartIndex(), selectExpr.getTokenStopIndex()) + + " as " + alias; + } + + void setAddGroupByClause() { + this.addGroupByClause = true; + } + + + void addGByClauseRewrite(ASTNode selectExpr) { + if ( gByClauseAdditions == null ) { + gByClauseAdditions = ""; + } + + if ( !addGroupByClause || !gByClauseAdditions.equals("") ) { + gByClauseAdditions += ", "; + } + + gByClauseAdditions += stream.toString( + selectExpr.getTokenStartIndex(), + selectExpr.getTokenStopIndex()); + } + + /* + * joinCond represents a correlated predicate. + * leftIsRewritten, rightIsRewritten indicates if either side has been replaced by a column alias. + * + * If a side is not rewritten, we get its text from the tokenstream. + * For rewritten conditions we form the text based on the table and column reference. + */ + void addJoinCondition(ASTNode joinCond, boolean leftIsRewritten, boolean rightIsRewritten) { + StringBuilder b = new StringBuilder(); + + if ( joiningCondition == null ) { + joiningCondition = " on "; + } else { + b.append(" and "); + } + addCondition(b, (ASTNode) joinCond.getChild(0), leftIsRewritten); + b.append(" = "); + addCondition(b, (ASTNode) joinCond.getChild(1), rightIsRewritten); + + joiningCondition += b.toString(); + } + + private void addCondition(StringBuilder b, ASTNode cond, boolean rewritten) { + if ( !rewritten ) { + b.append(stream.toString(cond.getTokenStartIndex(), cond.getTokenStopIndex())); + } else { + addReference(b, cond); + } + } + + private void addReference(StringBuilder b, ASTNode ref) { + if ( ref.getType() == HiveParser.DOT ) { + b.append(ref.getChild(0).getChild(0).getText()). + append("."). + append(ref.getChild(1).getText()); + } else { + b.append(ref.getText()); + } + } + + void addPostJoinCondition(ASTNode cond) { + StringBuilder b = new StringBuilder(); + addReference(b, (ASTNode) cond.getChild(1)); + outerQueryPostJoinCond = b.toString() + " is null"; + } + + public String getOuterQueryPostJoinCond() { + return outerQueryPostJoinCond; + } + } + + public static class QBSubQueryRewriteNoop extends QBSubQueryRewrite { + + QBSubQueryRewriteNoop(QBSubQuery subQuery, TokenRewriteStream stream) { + super(subQuery, stream); + } + + @Override + public final String getRewrittenQuery() { + throw new UnsupportedOperationException(); + } + + @Override + public final String getJoiningCondition() { + throw new UnsupportedOperationException(); + } + + @Override + final void addWhereClauseRewrite(ASTNode predicate) { + } + + @Override + final void addWhereClauseRewrite(String cond) { + } + + @Override + final void addSelectClauseRewrite(ASTNode selectExpr, String alias) { + } + + @Override + final void setAddGroupByClause() { + } + + @Override + final void addGByClauseRewrite(ASTNode selectExpr) { + } + + @Override + final void addJoinCondition(ASTNode joinCond, boolean leftIsRewritten, + boolean rightIsRewritten) { + } + + @Override + final void addPostJoinCondition(ASTNode cond) { + } + + @Override + public final String getOuterQueryPostJoinCond() { + throw new UnsupportedOperationException(); + } + + } + +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java index 07d32ed..089ad78 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; +import org.antlr.runtime.tree.CommonTreeAdaptor; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -630,6 +631,12 @@ static ASTNode buildNotInNullJoinCond(String subqueryAlias, String cntAlias) { public String getOuterQueryId(); }; + + /* + * Using CommonTreeAdaptor because the Adaptor in ParseDriver doesn't carry + * the token indexes when duplicating a Tree. + */ + static final CommonTreeAdaptor adaptor = new CommonTreeAdaptor(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainSQRewriteWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainSQRewriteWork.java new file mode 100644 index 0000000..a7a4054 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainSQRewriteWork.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; + +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.QB; + +public class ExplainSQRewriteWork implements Serializable { + private static final long serialVersionUID = 1L; + + private String resFile; + private QB qb; + private ASTNode ast; + private Context ctx; + + + public ExplainSQRewriteWork() { + } + + public ExplainSQRewriteWork(String resFile, QB qb, ASTNode ast, Context ctx) { + this.resFile = resFile; + this.qb = qb; + this.ast = ast; + this.ctx = ctx; + } + + public String getResFile() { + return resFile; + } + + public QB getQb() { + return qb; + } + + public ASTNode getAst() { + return ast; + } + + public Context getCtx() { + return ctx; + } + +} diff --git ql/src/test/queries/clientpositive/subquery_exists_explain_rewrite.q ql/src/test/queries/clientpositive/subquery_exists_explain_rewrite.q new file mode 100644 index 0000000..e3b698d --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_exists_explain_rewrite.q @@ -0,0 +1,22 @@ +-- no agg, corr +explain subquery rewrite +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9' + ) +; + +-- sq in from +explain subquery rewrite +select * +from (select * + from src b + where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') + ) a +; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_in_explain_rewrite.q ql/src/test/queries/clientpositive/subquery_in_explain_rewrite.q new file mode 100644 index 0000000..5e731ff --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_in_explain_rewrite.q @@ -0,0 +1,109 @@ +DROP TABLE part; + +-- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +DROP TABLE lineitem; +CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + + +-- non agg, non corr +explain subquery rewrite + select * +from src +where src.key in (select key from src s1 where s1.key > '9') +; + +-- non agg, corr +explain subquery rewrite +select * +from src b +where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +; + +-- agg, non corr +explain subquery rewrite +select p_name, p_size +from +part where part.p_size in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +; + +-- agg, corr +explain subquery rewrite +select p_mfgr, p_name, p_size +from part b where b.p_size in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +; + +-- distinct, corr +explain subquery rewrite +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) +; + +-- non agg, non corr, windowing +explain subquery rewrite +select p_mfgr, p_name, p_size +from part +where part.p_size in + (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) +; + +-- non agg, non corr, with join in Parent Query +explain subquery rewrite +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +; + +-- non agg, corr, with join in Parent Query +explain subquery rewrite +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +; diff --git ql/src/test/results/clientpositive/subquery_exists_explain_rewrite.q.out ql/src/test/results/clientpositive/subquery_exists_explain_rewrite.q.out new file mode 100644 index 0000000..0a44131 --- /dev/null +++ ql/src/test/results/clientpositive/subquery_exists_explain_rewrite.q.out @@ -0,0 +1,76 @@ +PREHOOK: query: -- no agg, corr +explain subquery rewrite +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- no agg, corr +explain subquery rewrite +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9' + ) +POSTHOOK: type: QUERY + +Where Clause Rewritten SubQuery: +(select a.key, a.value as sq_corr_0, a.key as sq_corr_1 + from src a + where a.value > 'val_9' + ) sq_1 + +Where Clause SubQuery Joining Condition: + on b.value = sq_1.sq_corr_0 and sq_1.sq_corr_1 = b.key + +Rewritten Query: +select * +from src b left semi join (select a.key, a.value as sq_corr_0, a.key as sq_corr_1 + from src a + where a.value > 'val_9' + ) sq_1 on b.value = sq_1.sq_corr_0 and sq_1.sq_corr_1 = b.key +where 1 = 1 +PREHOOK: query: -- sq in from +explain subquery rewrite +select * +from (select * + from src b + where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') + ) a +PREHOOK: type: QUERY +POSTHOOK: query: -- sq in from +explain subquery rewrite +select * +from (select * + from src b + where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') + ) a +POSTHOOK: type: QUERY + +Where Clause for Query Block 'a' Rewritten SubQuery: +(select a.key, a.value as sq_corr_0, a.key as sq_corr_1 + from src a + where a.value > 'val_9') sq_1 + +Where Clause for Query Block 'a' SubQuery Joining Condition: + on b.value = sq_1.sq_corr_0 and sq_1.sq_corr_1 = b.key + +Rewritten Query: +select * +from (select * + from src b left semi join (select a.key, a.value as sq_corr_0, a.key as sq_corr_1 + from src a + where a.value > 'val_9') sq_1 on b.value = sq_1.sq_corr_0 and sq_1.sq_corr_1 = b.key + where 1 = 1 + ) a diff --git ql/src/test/results/clientpositive/subquery_in_explain_rewrite.q.out ql/src/test/results/clientpositive/subquery_in_explain_rewrite.q.out new file mode 100644 index 0000000..9423b0a --- /dev/null +++ ql/src/test/results/clientpositive/subquery_in_explain_rewrite.q.out @@ -0,0 +1,323 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: DROP TABLE lineitem +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE lineitem +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem +PREHOOK: query: -- non agg, non corr +explain subquery rewrite + select * +from src +where src.key in (select key from src s1 where s1.key > '9') +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr +explain subquery rewrite + select * +from src +where src.key in (select key from src s1 where s1.key > '9') +POSTHOOK: type: QUERY + +Where Clause Rewritten SubQuery: +(select key from src s1 where s1.key > '9') sq_1 + +Where Clause SubQuery Joining Condition: + on src.key = sq_1.key + +Rewritten Query: +select * +from src left semi join (select key from src s1 where s1.key > '9') sq_1 on src.key = sq_1.key +where 1 = 1 +PREHOOK: query: -- non agg, corr +explain subquery rewrite +select * +from src b +where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr +explain subquery rewrite +select * +from src b +where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +POSTHOOK: type: QUERY + +Where Clause Rewritten SubQuery: +(select a.key, a.value as sq_corr_0 + from src a + where a.key > '9' + ) sq_1 + +Where Clause SubQuery Joining Condition: + on b.value = sq_1.sq_corr_0 and b.key = sq_1.key + +Rewritten Query: +select * +from src b left semi join (select a.key, a.value as sq_corr_0 + from src a + where a.key > '9' + ) sq_1 on b.value = sq_1.sq_corr_0 and b.key = sq_1.key +where 1 = 1 +PREHOOK: query: -- agg, non corr +explain subquery rewrite +select p_name, p_size +from +part where part.p_size in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, non corr +explain subquery rewrite +select p_name, p_size +from +part where part.p_size in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +POSTHOOK: type: QUERY + +Where Clause Rewritten SubQuery: +(select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) sq_1 + +Where Clause SubQuery Joining Condition: + on part.p_size = sq_1._c0 + +Rewritten Query: +select p_name, p_size +from +part left semi join (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) sq_1 on part.p_size = sq_1._c0 where 1 = 1 +PREHOOK: query: -- agg, corr +explain subquery rewrite +select p_mfgr, p_name, p_size +from part b where b.p_size in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, corr +explain subquery rewrite +select p_mfgr, p_name, p_size +from part b where b.p_size in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +POSTHOOK: type: QUERY + +Where Clause Rewritten SubQuery: +(select min(p_size), a.p_mfgr as sq_corr_0 + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + group by group by a.p_mfgr) sq_1 + +Where Clause SubQuery Joining Condition: + on b.p_mfgr = sq_1.sq_corr_0 and b.p_size = sq_1._c0 + +Rewritten Query: +select p_mfgr, p_name, p_size +from part b left semi join (select min(p_size), a.p_mfgr as sq_corr_0 + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + group by a.p_mfgr) sq_1 on b.p_mfgr = sq_1.sq_corr_0 and b.p_size = sq_1._c0 where 1 = 1 +PREHOOK: query: -- distinct, corr +explain subquery rewrite +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- distinct, corr +explain subquery rewrite +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) +POSTHOOK: type: QUERY + +Where Clause Rewritten SubQuery: +(select distinct a.key, a.value as sq_corr_0 + from src a + where a.key > '9' + ) sq_1 + +Where Clause SubQuery Joining Condition: + on b.value = sq_1.sq_corr_0 and b.key = sq_1.key + +Rewritten Query: +select * +from src b left semi join (select distinct a.key, a.value as sq_corr_0 + from src a + where a.key > '9' + ) sq_1 on b.value = sq_1.sq_corr_0 and b.key = sq_1.key +where 1 = 1 +PREHOOK: query: -- non agg, non corr, windowing +explain subquery rewrite +select p_mfgr, p_name, p_size +from part +where part.p_size in + (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr, windowing +explain subquery rewrite +select p_mfgr, p_name, p_size +from part +where part.p_size in + (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) +POSTHOOK: type: QUERY + +Where Clause Rewritten SubQuery: +(select first_value(p_size) over(partition by p_mfgr order by p_size) from part) sq_1 + +Where Clause SubQuery Joining Condition: + on part.p_size = sq_1._wcol0 + +Rewritten Query: +select p_mfgr, p_name, p_size +from part left semi join (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) sq_1 on part.p_size = sq_1._wcol0 +where 1 = 1 +PREHOOK: query: -- non agg, non corr, with join in Parent Query +explain subquery rewrite +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr, with join in Parent Query +explain subquery rewrite +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +POSTHOOK: type: QUERY + +Where Clause Rewritten SubQuery: +(select l_orderkey from lineitem where l_shipmode = 'AIR') sq_1 + +Where Clause SubQuery Joining Condition: + on li.l_orderkey = sq_1.l_orderkey + +Rewritten Query: +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey left semi join (select l_orderkey from lineitem where l_shipmode = 'AIR') sq_1 on li.l_orderkey = sq_1.l_orderkey +where li.l_linenumber = 1 and + 1 = 1 +PREHOOK: query: -- non agg, corr, with join in Parent Query +explain subquery rewrite +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr, with join in Parent Query +explain subquery rewrite +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +POSTHOOK: type: QUERY + +Where Clause Rewritten SubQuery: +(select l_orderkey, l_linenumber as sq_corr_0 from lineitem where l_shipmode = 'AIR') sq_1 + +Where Clause SubQuery Joining Condition: + on sq_1.sq_corr_0 = li.l_linenumber and li.l_orderkey = sq_1.l_orderkey + +Rewritten Query: +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey left semi join (select l_orderkey, l_linenumber as sq_corr_0 from lineitem where l_shipmode = 'AIR') sq_1 on sq_1.sq_corr_0 = li.l_linenumber and li.l_orderkey = sq_1.l_orderkey +where li.l_linenumber = 1 and + 1 = 1