diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index a5a867a..35b1865 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -80,6 +80,7 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.ASTRewriter; import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; @@ -418,6 +419,12 @@ public int compile(String command, boolean resetTaskIds) { ASTNode tree = pd.parse(command, ctx); tree = ParseUtils.findRootNonNullToken(tree); + // Apply rewrites to the AST + ASTRewriter rewrite = new ASTRewriter(); + if (rewrite.inSubqueryToJoin(tree) && LOG.isDebugEnabled()) { + LOG.debug("rewritten AST tree=" + tree.dump()); + } + BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree); List saHooks = getHooks(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK, diff --git ql/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessor.java ql/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessor.java index bd83c88..c6973e1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessor.java @@ -41,4 +41,15 @@ */ Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException; + + /** + * Simplest implementation of NodeProcessor, it does nothing. + */ + public static class NoOp implements NodeProcessor { + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + return null; + } + } } + diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java index 7dbd96f..79f5ce7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java @@ -20,7 +20,9 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.List; +import org.antlr.runtime.CommonToken; import org.antlr.runtime.Token; import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.Tree; @@ -47,6 +49,14 @@ public ASTNode(Token t) { super(t); } + public ASTNode(int tokenType, String text) { + this(new CommonToken(tokenType, text)); + } + + public ASTNode(int tokenType) { + this(tokenType, ParseUtils.getTokenName(tokenType)); + } + public ASTNode(ASTNode node) { super(node); this.origin = node.origin; @@ -94,6 +104,26 @@ public ASTNodeOrigin getOrigin() { } /** + * This method has the same functionality as + * {@link org.antlr.runtime.tree.BaseTree#addChild} except that it returns + * a reference to itself. Useful for chaining. + */ + public ASTNode addASTChild(ASTNode childTree) { + super.addChild(childTree); + return this; + } + + /** + * This method has the same functionality as + * {@link org.antlr.runtime.tree.BaseTree#addChildren} except that it returns + * a reference to itself. Useful for chaining. + */ + public ASTNode addASTChildren(List kids) { + super.addChildren(kids); + return this; + } + + /** * Tag this ASTNode with information about the object from which this node * originated. */ diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ASTRewriter.java ql/src/java/org/apache/hadoop/hive/ql/parse/ASTRewriter.java new file mode 100644 index 0000000..e45f415 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ASTRewriter.java @@ -0,0 +1,275 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import java.util.ArrayList; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; + +/** + * Provides rewrite rules to transform an Abstract Syntax Tree to a preferred, + * but semantically equivalent, form. + */ +public class ASTRewriter { + + static final private Log LOG = LogFactory.getLog(ASTRewriter.class.getName()); + static final private LogHelper console = new LogHelper(LOG); + + static final private String RE_QUERY_WHERE_FUNCTION = + HiveParser.TOK_QUERY + "%" + + HiveParser.TOK_INSERT + "%" + + HiveParser.TOK_WHERE + "%" + + // TODO(mweaver 14Jun2013): Enable this for more complex queries. + // "(" + HiveParser.TOK_OP_AND + "|" + + // HiveParser.TOK_OP_OR + "%)*" + + HiveParser.TOK_FUNCTION + "%"; + + private ASTNode outerKeyAST = null; + private ASTNode subqueryAST = null; + private ASTNode outerSelectAST = null; + private ASTNode srcTablerefAST = null; + private ASTNode destinationAST = null; + + /** + * Transforms a query with a condition involving + * + * WHERE x IN (<column subquery>) + * + * into an equivalent JOIN query. + * + * @param tree The query's abstract syntax tree, which will be modified if + * this query is eligible for rewrite. + * @returns true if the query was rewritten, false if not. + * + * The rule processors perform two roles: (1) Determine whether a query may be + * eligible for rewrite; and in case there is a rewrite, (2) save the subtrees + * of the AST that will get re-used in the rewrite. + */ + // FIXME(mweaver 14Jun2013): Will have to be smarter to handle compound + // queries -- ensure that TOK_QUERY is the same one (and not a subquery or + // higher-level query) for all these rules. + public boolean inSubqueryToJoin(ASTNode tree) throws SemanticException { + ASTNode queryNode = tree; + if (queryNode.getType() != HiveParser.TOK_QUERY) { + queryNode = (ASTNode) queryNode.getFirstChildWithType(HiveParser.TOK_QUERY); + if (queryNode == null) { + return false; + } + } + + Logger ruleRegexpLogger = null; + if (LOG.isDebugEnabled()) { + LOG.debug("Begin inSubqueryToJoin, dump AST tree=" + tree.dump()); + // ruleRegexpLogger = Logger.getLogger(RuleRegExp.class); + // ruleRegexpLogger.setLevel(Level.DEBUG); + } + + Map opRules = new LinkedHashMap(); + // These rules are mutually exclusive, so 'cost' is either positive (there + // is a match) or -1 (no match, so don't apply the rule). + + // Determines whether the query's where clause contains ' IN' { ? and + // if so stores a reference to in outerKeyAST -- not sure if we can + // do that here in one rule }. + opRules.put(new RuleRegExp("R1_whereKeyIn", + "^" + RE_QUERY_WHERE_FUNCTION + + HiveParser.KW_IN + "%$"), + getHasWhereInProcessor()); + // Determines existence of subquery in WHERE, and stores a reference to its + // entire subtree in subqueryAST. + opRules.put(new RuleRegExp("R2_subquery", + "^" + RE_QUERY_WHERE_FUNCTION + + HiveParser.TOK_SUBQUERY + "%" + + HiveParser.TOK_QUERY + "%"), + getSubqueryProcessor()); + // Locates the top-level SELECT clause and stores a reference to it in + // outerSelectAST. + opRules.put(new RuleRegExp("R3_outerSelect", + "^" + HiveParser.TOK_QUERY + "%" + + HiveParser.TOK_INSERT + "%" + + HiveParser.TOK_SELECT + "%"), + getOuterSelectProcessor()); + // Locates the top-level FROM clause and stores its table reference in + // srcTablerefAST. + opRules.put(new RuleRegExp("R4_srcTableRef", + "^" + HiveParser.TOK_QUERY + "%" + + HiveParser.TOK_FROM + "%" + + HiveParser.TOK_TABREF + "%"), + getSrcTableProcessor()); + // Locates the top-level INSERT clause and stores its destination in + // destinationAST. + opRules.put(new RuleRegExp("R5_destination", + "^" + HiveParser.TOK_QUERY + "%" + + HiveParser.TOK_INSERT + "%" + + HiveParser.TOK_DESTINATION + "%"), + getDestinationProcessor()); + + GraphWalker graphWalker = new DefaultGraphWalker( + new DefaultRuleDispatcher(new NodeProcessor.NoOp(), + opRules, null)); + + try { + graphWalker.startWalking(Collections.singletonList((Node)queryNode), null); + } catch (SemanticException e) { + LOG.error("Bad rule match on AST=" + tree.dump()); + throw e; + } finally { + if (ruleRegexpLogger != null) { + ruleRegexpLogger.setLevel(null); + } + } + + if (subqueryAST != null) { + assert (outerKeyAST != null && outerSelectAST != null && + srcTablerefAST != null && destinationAST != null); + buildNewAST(queryNode); + return true; + } else { + LOG.debug("inSubqueryToJoin, no rewrite possible"); + } + return false; + } + + /** + * Revises the children of 'tree'. + */ + private void buildNewAST(ASTNode tree) throws SemanticException { + assert(tree.getType() == HiveParser.TOK_QUERY); + // Preserve the root node (child of some unknown parent), but replace its + // child FROM and INSERT clauses. + for (int i = tree.getChildCount() - 1; i >= 0; i--) { + int tType = tree.getChild(i).getType(); + if (tType == HiveParser.TOK_FROM || tType == HiveParser.TOK_INSERT) { + tree.deleteChild(i); + } + } + tree.addASTChildren( + Arrays.asList( + new ASTNode(HiveParser.TOK_FROM).addASTChild( + new ASTNode(HiveParser.TOK_LEFTSEMIJOIN).addASTChildren( + Arrays.asList( + srcTablerefAST, + new ASTNode(HiveParser.TOK_SUBQUERY).addASTChildren( + Arrays.asList( + subqueryAST, + new ASTNode(HiveParser.Identifier, "t2"))), + new ASTNode(HiveParser.EQUAL).addASTChildren( + Arrays.asList( + outerKeyAST, + new ASTNode(HiveParser.DOT).addASTChildren( + Arrays.asList( + new ASTNode(HiveParser.TOK_TABLE_OR_COL).addASTChild( + new ASTNode(HiveParser.Identifier, "t2")), + new ASTNode(HiveParser.Identifier, "k")))))))), + new ASTNode(HiveParser.TOK_INSERT).addASTChildren( + Arrays.asList(destinationAST, + outerSelectAST)))); + } + + // NodeProcessor implementations, declared anonymously. + NodeProcessor getHasWhereInProcessor() { + return new NodeProcessor() { + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + if (outerKeyAST == null) { + // Store the node + ASTNode functionAST = (ASTNode) stack.get(stack.size() - 2); + if (!(functionAST.getType() == HiveParser.TOK_FUNCTION)) { + String msg = "Error in AST rewrite, incorrect match for function"; + LOG.error(msg + ": functionAST=" + functionAST.dump()); + throw new SemanticException(msg); + } + outerKeyAST = (ASTNode) functionAST.getChild(1); + if (!(outerKeyAST.getType() == HiveParser.DOT || + outerKeyAST.getType() == HiveParser.TOK_TABLE_OR_COL)) { + String msg = "Error in AST rewrite, incorrect match for ' IN'"; + LOG.error(msg + ": outerKeyAST=" + outerKeyAST.dump()); + throw new SemanticException(msg); + } + } + return null; + } + }; + } + NodeProcessor getSubqueryProcessor() { + return new NodeProcessor() { + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + if (subqueryAST == null) { + subqueryAST = (ASTNode) nd; + } + return null; + } + }; + } + NodeProcessor getOuterSelectProcessor() { + return new NodeProcessor() { + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + if (outerSelectAST == null) { + outerSelectAST = (ASTNode) nd; + } + return null; + } + }; + } + NodeProcessor getSrcTableProcessor() { + return new NodeProcessor() { + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + if (srcTablerefAST == null) { + srcTablerefAST = (ASTNode) nd; + } + return null; + } + }; + } + NodeProcessor getDestinationProcessor() { + return new NodeProcessor() { + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + if (destinationAST == null) { + destinationAST = (ASTNode) nd; + } + return null; + } + }; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 8a5ef0e..000427a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -380,6 +380,15 @@ precedenceEqualExpression expressions : LPAREN expression (COMMA expression)* RPAREN -> expression* + | + subQuery + ; + +subQuery +@init { gParent.msgs.push("column subquery"); } +@after { gParent.msgs.pop(); } + : + LPAREN queryStatementExpression RPAREN -> ^(TOK_SUBQUERY queryStatementExpression) ; precedenceNotOperator diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index cd9f693..779d9a7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -20,14 +20,14 @@ import java.util.ArrayList; import java.util.Iterator; +import java.util.Iterator; import java.util.List; +import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.ErrorMsg; - -import java.util.Iterator; -import java.util.Map; - import org.apache.hadoop.hive.ql.metadata.Table; @@ -37,6 +37,8 @@ */ public final class ParseUtils { + static final private Log LOG = LogFactory.getLog(ParseUtils.class); + /** * Tests whether the parse tree node is a join token. * @@ -100,4 +102,52 @@ private ParseUtils() { } return colNames; } + + + static final int TOK_INDEX_OFFSET = getTokenIndexOffset(); + static final int FIRST_TOK_INDEX = getFirstTokenIndex(); + private static int getTokenIndexOffset() { + // This is, um, flaky. No idea why Antlr doesn't generate a + // getTokenName() method. + int tokAliasListIndex = HiveParser.WS; + while (!HiveParser.tokenNames[tokAliasListIndex].equals("TOK_ALIASLIST") + && tokAliasListIndex < HiveParser.tokenNames.length) { + tokAliasListIndex++; + } + if (tokAliasListIndex >= HiveParser.tokenNames.length) { + throw new RuntimeException("getTokenIndexOffset() failed initialization"); + } + int offset = HiveParser.TOK_ALIASLIST - tokAliasListIndex; + assert(HiveParser.tokenNames[HiveParser.TOK_WHERE - offset] + .equals("TOK_WHERE")); + return offset; + } + + private static int getFirstTokenIndex() { + int firstTokenIndex = HiveParser.WS; + while (!HiveParser.tokenNames[firstTokenIndex].startsWith("TOK_") + && firstTokenIndex < HiveParser.tokenNames.length) { + firstTokenIndex++; + } + if (firstTokenIndex >= HiveParser.tokenNames.length) { + throw new RuntimeException("getFirstTokenIndex() failed initialization"); + } + LOG.debug("firstTokenIndex=" + firstTokenIndex); + return firstTokenIndex; + } + + public static String getTokenName(int tokenType) { + if (tokenType >= 0) { + if (tokenType < FIRST_TOK_INDEX) { + return HiveParser.tokenNames[tokenType]; + } else { + int tokenIndex = tokenType - TOK_INDEX_OFFSET; + if (tokenIndex < HiveParser.tokenNames.length) { + return HiveParser.tokenNames[tokenIndex]; + } + } + } + LOG.error("Bad tokenType: " + tokenType); + return ""; + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestGetTokenName.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestGetTokenName.java new file mode 100644 index 0000000..5e8fc53 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestGetTokenName.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import junit.framework.TestCase; +import static junit.framework.Assert.*; + +public class TestGetTokenName extends TestCase { + + @Override + protected void setUp() { + } + + @Override + protected void tearDown() { + } + + protected void check(int tokenType, String expectedName) { + assertEquals("tokenType="+tokenType, + expectedName, ParseUtils.getTokenName(tokenType)); + } + + public void testSomeKeywords() throws Exception { + check(HiveParser.AMPERSAND, "AMPERSAND"); + check(HiveParser.DecimalLiteral, "DecimalLiteral"); + check(HiveParser.KW_AND, "KW_AND"); + } + + public void testSomeTokens() throws Exception { + check(HiveParser.TOK_LEFTOUTERJOIN, "TOK_LEFTOUTERJOIN"); + check(HiveParser.TOK_WINDOWVALUES, "TOK_WINDOWVALUES"); + check(HiveParser.TOK_ALIASLIST, "TOK_ALIASLIST"); + } +} diff --git ql/src/test/queries/clientpositive/subquery_in_where.q ql/src/test/queries/clientpositive/subquery_in_where.q new file mode 100644 index 0000000..686f554 --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_in_where.q @@ -0,0 +1,11 @@ + +DROP TABLE a_few_keys; +CREATE TABLE a_few_keys(k INT); +INSERT OVERWRITE TABLE a_few_keys SELECT key FROM src WHERE key IN (1, 4, 5, 9, 11, 15); + +EXPLAIN +FROM src SELECT value +WHERE src.key IN ( SELECT k FROM a_few_keys WHERE k < 10 ); + +FROM src SELECT value +WHERE src.key IN ( SELECT k FROM a_few_keys WHERE k < 10 ); diff --git ql/src/test/results/clientpositive/subquery_in_where.q.out ql/src/test/results/clientpositive/subquery_in_where.q.out new file mode 100644 index 0000000..cdf07b8 --- /dev/null +++ ql/src/test/results/clientpositive/subquery_in_where.q.out @@ -0,0 +1,128 @@ +PREHOOK: query: DROP TABLE a_few_keys +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE a_few_keys +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE a_few_keys(k INT) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE a_few_keys(k INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@a_few_keys +PREHOOK: query: INSERT OVERWRITE TABLE a_few_keys SELECT key FROM src WHERE key IN (1, 4, 5, 9, 11, 15) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@a_few_keys +POSTHOOK: query: INSERT OVERWRITE TABLE a_few_keys SELECT key FROM src WHERE key IN (1, 4, 5, 9, 11, 15) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@a_few_keys +POSTHOOK: Lineage: a_few_keys.k EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +_col0 +PREHOOK: query: EXPLAIN +FROM src SELECT value +WHERE src.key IN ( SELECT k FROM a_few_keys WHERE k < 10 ) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src SELECT value +WHERE src.key IN ( SELECT k FROM a_few_keys WHERE k < 10 ) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: a_few_keys.k EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +Explain +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME a_few_keys))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k))) (TOK_WHERE (< (TOK_TABLE_OR_COL k) 10)))) t2) (EQUAL (. (TOK_TABLE_OR_COL src) key) (DOT (TOK_TABLE_OR_COL t2) k)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Reduce Output Operator + key expressions: + expr: UDFToDouble(key) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(key) + type: double + tag: 0 + value expressions: + expr: value + type: string + t2:a_few_keys + TableScan + alias: a_few_keys + Filter Operator + predicate: + expr: (k < 10) + type: boolean + Select Operator + expressions: + expr: k + type: int + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: int + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: UDFToDouble(_col0) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(_col0) + type: double + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: FROM src SELECT value +WHERE src.key IN ( SELECT k FROM a_few_keys WHERE k < 10 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@a_few_keys +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM src SELECT value +WHERE src.key IN ( SELECT k FROM a_few_keys WHERE k < 10 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_few_keys +POSTHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: Lineage: a_few_keys.k EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +value +val_4 +val_5 +val_5 +val_5 +val_9