diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java index 753df79a..b9c7e6f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -10,14 +10,15 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.DefaultExprProcessor; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -public class QBSubQuery { - +public class QBSubQuery implements ISubQueryJoinInfo { + public static enum SubQueryType { EXISTS, NOT_EXISTS, @@ -332,6 +333,83 @@ protected ASTNode firstDot(ASTNode dot) { } + /* + * When transforming a Not In SubQuery we need to check for nulls in the + * Joining expressions of the SubQuery. If there are nulls then the SubQuery always + * return false. For more details see + * https://issues.apache.org/jira/secure/attachment/12614003/SubQuerySpec.pdf + * + * Basically, SQL semantics say that: + * - R1.A not in (null, 1, 2, ...) + * is always false. + * A 'not in' operator is equivalent to a '<> all'. Since a not equal check with null + * returns false, a not in predicate against aset with a 'null' value always returns false. + * + * So for not in SubQuery predicates: + * - we join in a null count predicate. + * - And the joining condition is that the 'Null Count' query has a count of 0. + * + */ + class NotInCheck implements ISubQueryJoinInfo { + + private static final String CNT_ALIAS = "c1"; + + /* + * expressions in SubQ that are joined to the Outer Query. + */ + List subQryCorrExprs; + + /* + * row resolver of the SubQuery. + * Set by the SemanticAnalyzer after the Plan for the SubQuery is genned. + * This is neede in case the SubQuery select list contains a TOK_ALLCOLREF + */ + RowResolver sqRR; + + NotInCheck() { + subQryCorrExprs = new ArrayList(); + } + + void addCorrExpr(ASTNode corrExpr) { + subQryCorrExprs.add(corrExpr); + } + + public ASTNode getSubQueryAST() { + return SubQueryUtils.buildNotInNullCheckQuery( + QBSubQuery.this.getSubQueryAST(), + QBSubQuery.this.getAlias(), + CNT_ALIAS, + subQryCorrExprs, + sqRR); + } + + public String getAlias() { + return QBSubQuery.this.getAlias() + "_notin_nullcheck"; + } + + public JoinType getJoinType() { + return JoinType.LEFTSEMI; + } + + public ASTNode getJoinConditionAST() { + return + SubQueryUtils.buildNotInNullJoinCond(getAlias(), CNT_ALIAS); + } + + public QBSubQuery getSubQuery() { + return QBSubQuery.this; + } + + public String getOuterQueryId() { + return QBSubQuery.this.getOuterQueryId(); + } + + void setSQRR(RowResolver sqRR) { + this.sqRR = sqRR; + } + + } + private final String outerQueryId; private final int sqIdx; private final String alias; @@ -355,6 +433,8 @@ protected ASTNode firstDot(ASTNode dot) { private boolean groupbyAddedToSQ; private int numOuterCorrExprsForHaving; + + private NotInCheck notInCheck; public QBSubQuery(String outerQueryId, int sqIdx, @@ -377,6 +457,10 @@ public QBSubQuery(String outerQueryId, originalSQASTOrigin = new ASTNodeOrigin("SubQuery", alias, s, alias, originalSQAST); numOfCorrelationExprsAddedToSQSelect = 0; groupbyAddedToSQ = false; + + if ( operator.getType() == SubQueryType.NOT_IN ) { + notInCheck = new NotInCheck(); + } } public ASTNode getSubQueryAST() { @@ -655,6 +739,9 @@ private void rewrite(RowResolver parentQueryRR, ASTNode gBy = getSubQueryGroupByAST(); SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getLeftExpr()); } + if ( notInCheck != null ) { + notInCheck.addCorrExpr((ASTNode)conjunctAST.getChild(0)); + } } else { if ( forHavingClause && conjunct.getLeftOuterColInfo() != null ) { rewriteCorrConjunctForHaving(conjunctAST, true, outerQueryAlias, @@ -671,6 +758,9 @@ private void rewrite(RowResolver parentQueryRR, ASTNode gBy = getSubQueryGroupByAST(); SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getRightExpr()); } + if ( notInCheck != null ) { + notInCheck.addCorrExpr((ASTNode)conjunctAST.getChild(1)); + } } } else { sqNewSearchCond = SubQueryUtils.andAST(sqNewSearchCond, conjunctAST); @@ -746,6 +836,14 @@ public int getNumOfCorrelationExprsAddedToSQSelect() { return numOfCorrelationExprsAddedToSQSelect; } + public QBSubQuery getSubQuery() { + return this; + } + + NotInCheck getNotInCheck() { + return notInCheck; + } + private void rewriteCorrConjunctForHaving(ASTNode conjunctASTNode, boolean refersLeft, String outerQueryAlias, diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index e9d9ee7..29cdd46 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -101,6 +101,7 @@ import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec; import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType; +import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo; import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.CurrentRowSpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction; @@ -1922,6 +1923,17 @@ private Operator genHavingPlan(String dest, QB qb, Operator input, output = putOpInsertMap(output, inputRR); return output; } + + private Operator genPlanForSubQueryPredicate( + QB qbSQ, + ISubQueryJoinInfo subQueryPredicate) throws SemanticException { + qbSQ.setSubQueryDef(subQueryPredicate.getSubQuery()); + Phase1Ctx ctx_1 = initPhase1Ctx(); + doPhase1(subQueryPredicate.getSubQueryAST(), qbSQ, ctx_1); + getMetaData(qbSQ); + Operator op = genPlan(qbSQ); + return op; + } @SuppressWarnings("nls") private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, @@ -2010,11 +2022,7 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias); QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); - qbSQ.setSubQueryDef(subQuery); - Phase1Ctx ctx_1 = initPhase1Ctx(); - doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1); - getMetaData(qbSQ); - Operator sqPlanTopOp = genPlan(qbSQ); + Operator sqPlanTopOp = genPlanForSubQueryPredicate(qbSQ, subQuery); aliasToOpInfo.put(subQuery.getAlias(), sqPlanTopOp); RowResolver sqRR = opParseCtx.get(sqPlanTopOp).getRowResolver(); @@ -2029,6 +2037,27 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( subQueryAST, "SubQuery can contain only 1 item in Select List.")); } + + /* + * If this is a Not In SubQuery Predicate then Join in the Null Check SubQuery. + * See QBSubQuery.NotInCheck for details on why and how this is constructed. + */ + if ( subQuery.getNotInCheck() != null ) { + QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); + notInCheck.setSQRR(sqRR); + QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true); + Operator sqnicPlanTopOp = genPlanForSubQueryPredicate(qbSQ_nic, notInCheck); + aliasToOpInfo.put(notInCheck.getAlias(), sqnicPlanTopOp); + QBJoinTree joinTree_nic = genSQJoinTree(qb, notInCheck, + input, + aliasToOpInfo); + pushJoinFilters(qb, joinTree_nic, aliasToOpInfo, false); + input = genJoinOperator(qbSQ_nic, joinTree_nic, aliasToOpInfo, input); + inputRR = opParseCtx.get(input).getRowResolver(); + if ( forHavingClause ) { + aliasToOpInfo.put(havingInputAlias, input); + } + } /* * Gen Join between outer Operator and SQ op @@ -6759,7 +6788,7 @@ private QBJoinTree genUniqueJoinTree(QB qb, ASTNode joinParseTree, * Given this information, once we initialize the QBJoinTree, we call the 'parseJoinCondition' * method to validate and parse Join conditions. */ - private QBJoinTree genSQJoinTree(QB qb, QBSubQuery subQuery, + private QBJoinTree genSQJoinTree(QB qb, ISubQueryJoinInfo subQuery, Operator joiningOp, Map aliasToOpInfo) throws SemanticException { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java index ddc096d..04f6ee0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java @@ -409,6 +409,124 @@ static void setOriginDeep(ASTNode node, ASTNodeOrigin origin) { setOriginDeep((ASTNode)node.getChild(i), origin); } } + + static ASTNode buildNotInNullCheckQuery(ASTNode subQueryAST, + String subQueryAlias, + String cntAlias, + List corrExprs, + RowResolver sqRR) { + + subQueryAST = (ASTNode) ParseDriver.adaptor.dupTree(subQueryAST); + ASTNode qry = (ASTNode) + ParseDriver.adaptor.create(HiveParser.TOK_QUERY, "TOK_QUERY"); + + qry.addChild(buildNotInNullCheckFrom(subQueryAST, subQueryAlias)); + ASTNode insertAST = buildNotInNullCheckInsert(); + qry.addChild(insertAST); + insertAST.addChild(buildNotInNullCheckSelect(cntAlias)); + insertAST.addChild(buildNotInNullCheckWhere(subQueryAST, + subQueryAlias, corrExprs, sqRR)); + + return qry; + } + + static ASTNode buildNotInNullCheckFrom(ASTNode subQueryAST, String subQueryAlias) { + ASTNode from = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FROM, "TOK_FROM"); + ASTNode sqExpr = (ASTNode) + ParseDriver.adaptor.create(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY"); + sqExpr.addChild(subQueryAST); + sqExpr.addChild(createAliasAST(subQueryAlias)); + from.addChild(sqExpr); + return from; + } + + static ASTNode buildNotInNullCheckInsert() { + ASTNode insert = (ASTNode) + ParseDriver.adaptor.create(HiveParser.TOK_INSERT, "TOK_INSERT"); + ASTNode dest = (ASTNode) + ParseDriver.adaptor.create(HiveParser.TOK_DESTINATION, "TOK_DESTINATION"); + ASTNode dir = (ASTNode) + ParseDriver.adaptor.create(HiveParser.TOK_DIR, "TOK_DIR"); + ASTNode tfile = (ASTNode) + ParseDriver.adaptor.create(HiveParser.TOK_TMP_FILE, "TOK_TMP_FILE"); + insert.addChild(dest); + dest.addChild(dir); + dir.addChild(tfile); + + return insert; + } + + static ASTNode buildNotInNullCheckSelect(String cntAlias) { + ASTNode select = (ASTNode) + ParseDriver.adaptor.create(HiveParser.TOK_SELECT, "TOK_SELECT"); + ASTNode selectExpr = (ASTNode) + ParseDriver.adaptor.create(HiveParser.TOK_SELEXPR, "TOK_SELEXPR"); + ASTNode countStar = (ASTNode) + ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONSTAR, "TOK_FUNCTIONSTAR"); + ASTNode alias = (createAliasAST(cntAlias)); + + countStar.addChild((ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, "count")); + select.addChild(selectExpr); + selectExpr.addChild(countStar); + selectExpr.addChild(alias); + + return select; + } + + static ASTNode buildNotInNullCheckWhere(ASTNode subQueryAST, + String sqAlias, + List corrExprs, + RowResolver sqRR) { + + ASTNode sqSelect = (ASTNode) subQueryAST.getChild(1).getChild(1); + ASTNode selExpr = (ASTNode) sqSelect.getChild(0); + String colAlias = null; + + if ( selExpr.getChildCount() == 2 ) { + colAlias = selExpr.getChild(1).getText(); + } else if (selExpr.getChild(0).getType() != HiveParser.TOK_ALLCOLREF) { + colAlias = sqAlias + "_ninc_col0"; + selExpr.addChild((ASTNode)ParseDriver.adaptor.create(HiveParser.Identifier, colAlias)); + } else { + List signature = sqRR.getRowSchema().getSignature(); + ColumnInfo joinColumn = signature.get(0); + String[] joinColName = sqRR.reverseLookup(joinColumn.getInternalName()); + colAlias = joinColName[1]; + } + + ASTNode searchCond = isNull(createColRefAST(sqAlias, colAlias)); + + for(ASTNode e : corrExprs ) { + ASTNode p = (ASTNode) ParseDriver.adaptor.dupTree(e); + p = isNull(p); + searchCond = orAST(searchCond, p); + } + + ASTNode where = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_WHERE, "TOK_WHERE"); + where.addChild(searchCond); + return where; + } + + static ASTNode buildNotInNullJoinCond(String subqueryAlias, String cntAlias) { + + ASTNode eq = (ASTNode) + ParseDriver.adaptor.create(HiveParser.EQUAL, "="); + + eq.addChild(createColRefAST(subqueryAlias, cntAlias)); + eq.addChild((ASTNode) + ParseDriver.adaptor.create(HiveParser.Number, "0")); + + return eq; + } + + public static interface ISubQueryJoinInfo { + public String getAlias(); + public JoinType getJoinType(); + public ASTNode getJoinConditionAST(); + public QBSubQuery getSubQuery(); + public ASTNode getSubQueryAST(); + public String getOuterQueryId(); + }; } diff --git ql/src/test/queries/clientpositive/subquery_notin.q ql/src/test/queries/clientpositive/subquery_notin.q index d7eca3e..d5f6086 100644 --- ql/src/test/queries/clientpositive/subquery_notin.q +++ ql/src/test/queries/clientpositive/subquery_notin.q @@ -127,3 +127,17 @@ from src where not src.key in ( select key from src s1 where s1.key > '2') order by key ; + +-- null check +create view T1_v as +select key from src where key <'11'; + +create view T2_v as +select case when key > '104' then null else key end as key from T1_v; + +explain +select * +from T1_v where T1_v.key not in (select T2_v.key from T2_v); + +select * +from T1_v where T1_v.key not in (select T2_v.key from T2_v); diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.out ql/src/test/results/clientpositive/subquery_multiinsert.q.out index a917a13..4f142f3 100644 --- ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -55,10 +55,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2, Stage-10 Stage-5 depends on stages: Stage-4 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + Stage-10 is a root stage STAGE PLANS: Stage: Stage-2 @@ -176,18 +178,53 @@ STAGE PLANS: $INTNAME TableScan Reduce Output Operator - key expressions: + sort order: + tag: 0 + value expressions: expr: key type: string + expr: value + type: string + $INTNAME1 + TableScan + Reduce Output Operator + sort order: + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string sort order: + Map-reduce partition columns: - expr: key + expr: _col0 type: string tag: 0 value expressions: - expr: key + expr: _col0 type: string - expr: value + expr: _col1 type: string sq_2:s1 TableScan @@ -241,7 +278,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -278,9 +315,63 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Alias -> Map Operator Tree: + sq_2_notin_nullcheck:sq_2:s1 + TableScan + alias: s1 + Filter Operator + predicate: + expr: ((key > '2') and key is null) + type: boolean + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: + expr: (_col0 = 0) + type: boolean + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: bigint + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 @@ -314,10 +405,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@src_4 POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 @@ -326,10 +417,10 @@ POSTHOOK: query: select * from src_4 POSTHOOK: type: QUERY POSTHOOK: Input: default@src_4 #### A masked pattern was here #### -POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] 90 val_90 90 val_90 90 val_90 @@ -349,10 +440,10 @@ POSTHOOK: query: select * from src_5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src_5 #### A masked pattern was here #### -POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] 0 val_0 0 val_0 0 val_0 diff --git ql/src/test/results/clientpositive/subquery_notin.q.out ql/src/test/results/clientpositive/subquery_notin.q.out index bf87e3b..497d332 100644 --- ql/src/test/results/clientpositive/subquery_notin.q.out +++ ql/src/test/results/clientpositive/subquery_notin.q.out @@ -107,12 +107,120 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: + sq_1_notin_nullcheck:sq_1:s1 + TableScan + alias: s1 + Filter Operator + predicate: + expr: ((key > '2') and key is null) + type: boolean + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: + expr: (_col0 = 0) + type: boolean + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: bigint + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + sort order: + tag: 1 + src + TableScan + alias: src + Reduce Output Operator + sort order: + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string sq_1:s1 TableScan alias: s1 @@ -137,23 +245,6 @@ STAGE PLANS: value expressions: expr: _col0 type: string - src - TableScan - alias: src - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string Reduce Operator Tree: Join Operator condition map: @@ -344,12 +435,15 @@ ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size))) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))) r)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_name))) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL r) 2) (= (. (TOK_TABLE_OR_COL b) p_mfgr) (. (TOK_TABLE_OR_COL a) p_mfgr)))))) (. (TOK_TABLE_OR_COL b) p_name)))))) STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: sq_1:a:part @@ -395,50 +489,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: $INTNAME TableScan Reduce Output Operator key expressions: - expr: _col0 - type: string expr: _col1 type: string + expr: _col2 + type: string sort order: ++ Map-reduce partition columns: - expr: _col0 - type: string expr: _col1 type: string - tag: 1 + expr: _col2 + type: string + tag: 0 value expressions: - expr: _col0 + expr: _col1 type: string - b + expr: _col2 + type: string + expr: _col5 + type: int + $INTNAME1 TableScan - alias: b Reduce Output Operator key expressions: - expr: p_name + expr: _col0 type: string - expr: p_mfgr + expr: _col1 type: string sort order: ++ Map-reduce partition columns: - expr: p_name + expr: _col0 type: string - expr: p_mfgr + expr: _col1 type: string - tag: 0 + tag: 1 value expressions: - expr: p_name - type: string - expr: p_mfgr + expr: _col0 type: string - expr: p_size - type: int Reduce Operator Tree: Join Operator condition map: @@ -469,6 +562,132 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + sq_1_notin_nullcheck:sq_1:a:part + TableScan + alias: part + Reduce Output Operator + key expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + sort order: ++ + Map-reduce partition columns: + expr: p_mfgr + type: string + tag: -1 + value expressions: + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Extract + PTF Operator + Filter Operator + predicate: + expr: ((_wcol0 <= 2) and (_col1 is null or _col2 is null)) + type: boolean + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: + expr: (_col0 = 0) + type: boolean + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: bigint + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + sort order: + tag: 1 + b + TableScan + alias: b + Reduce Output Operator + sort order: + tag: 0 + value expressions: + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col1} {VALUE._col2} {VALUE._col5} + 1 + handleSkewJoin: false + outputColumnNames: _col1, _col2, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -538,7 +757,10 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: @@ -638,22 +860,21 @@ STAGE PLANS: value expressions: expr: _col0 type: double - part + $INTNAME1 TableScan - alias: part Reduce Output Operator key expressions: - expr: UDFToDouble(p_size) + expr: UDFToDouble(_col5) type: double sort order: + Map-reduce partition columns: - expr: UDFToDouble(p_size) + expr: UDFToDouble(_col5) type: double tag: 0 value expressions: - expr: p_name + expr: _col1 type: string - expr: p_size + expr: _col5 type: int Reduce Operator Tree: Join Operator @@ -683,20 +904,157 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select p_name, p_size -from -part where part.p_size not in - (select avg(p_size) - from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 - ) -order by p_name, p_size -PREHOOK: type: QUERY + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + sq_1_notin_nullcheck:sq_1:a:part + TableScan + alias: part + Reduce Output Operator + key expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + sort order: ++ + Map-reduce partition columns: + expr: p_mfgr + type: string + tag: -1 + value expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Extract + PTF Operator + Filter Operator + predicate: + expr: (_wcol0 <= 2) + type: boolean + Select Operator + expressions: + expr: _col5 + type: int + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: avg(_col0) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: struct + Reduce Operator Tree: + Group By Operator + aggregations: + expr: avg(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: + expr: _col0 is null + type: boolean + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: complete + outputColumnNames: _col0 + Filter Operator + predicate: + expr: (_col0 = 0) + type: boolean + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: bigint + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + sort order: + tag: 1 + part + TableScan + alias: part + Reduce Output Operator + sort order: + tag: 0 + value expressions: + expr: p_name + type: string + expr: p_size + type: int + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col1} {VALUE._col5} + 1 + handleSkewJoin: false + outputColumnNames: _col1, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +order by p_name, p_size +PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### POSTHOOK: query: select p_name, p_size @@ -756,13 +1114,17 @@ ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size))) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))) r)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL p_size)))) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL r) 2) (= (. (TOK_TABLE_OR_COL b) p_mfgr) (. (TOK_TABLE_OR_COL a) p_mfgr)))))) (. (TOK_TABLE_OR_COL b) p_size)))))) STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-5 is a root stage + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: sq_1:a:part @@ -815,7 +1177,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -857,49 +1219,48 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: $INTNAME TableScan Reduce Output Operator key expressions: - expr: _col0 + expr: _col5 type: int - expr: _col1 + expr: _col2 type: string sort order: ++ Map-reduce partition columns: - expr: _col0 + expr: _col5 type: int - expr: _col1 + expr: _col2 type: string - tag: 1 + tag: 0 value expressions: - expr: _col0 + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col5 type: int - b + $INTNAME1 TableScan - alias: b Reduce Output Operator key expressions: - expr: p_size + expr: _col0 type: int - expr: p_mfgr + expr: _col1 type: string sort order: ++ Map-reduce partition columns: - expr: p_size + expr: _col0 type: int - expr: p_mfgr + expr: _col1 type: string - tag: 0 + tag: 1 value expressions: - expr: p_name - type: string - expr: p_mfgr - type: string - expr: p_size + expr: _col0 type: int Reduce Operator Tree: Join Operator @@ -931,6 +1292,185 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + sq_1_notin_nullcheck:sq_1:a:part + TableScan + alias: part + Reduce Output Operator + key expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + sort order: ++ + Map-reduce partition columns: + expr: p_mfgr + type: string + tag: -1 + value expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Extract + PTF Operator + Filter Operator + predicate: + expr: (_wcol0 <= 2) + type: boolean + Select Operator + expressions: + expr: _col2 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: min(_col1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: int + Reduce Operator Tree: + Group By Operator + aggregations: + expr: min(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (_col1 is null or _col0 is null) + type: boolean + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: + expr: (_col0 = 0) + type: boolean + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: bigint + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + sort order: + tag: 1 + b + TableScan + alias: b + Reduce Output Operator + sort order: + tag: 0 + value expressions: + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col1} {VALUE._col2} {VALUE._col5} + 1 + handleSkewJoin: false + outputColumnNames: _col1, _col2, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -1145,3 +1685,231 @@ POSTHOOK: Input: default@src 199 val_199 199 val_199 2 val_2 +PREHOOK: query: -- null check +create view T1_v as +select key from src where key <'11' +PREHOOK: type: CREATEVIEW +POSTHOOK: query: -- null check +create view T1_v as +select key from src where key <'11' +POSTHOOK: type: CREATEVIEW +POSTHOOK: Output: default@T1_v +PREHOOK: query: create view T2_v as +select case when key > '104' then null else key end as key from T1_v +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@t1_v +POSTHOOK: query: create view T2_v as +select case when key > '104' then null else key end as key from T1_v +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@t1_v +POSTHOOK: Output: default@T2_v +PREHOOK: query: explain +select * +from T1_v where T1_v.key not in (select T2_v.key from T2_v) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from T1_v where T1_v.key not in (select T2_v.key from T2_v) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1_v))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2_v))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL T2_v) key))))) (. (TOK_TABLE_OR_COL T1_v) key)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + sq_1_notin_nullcheck:sq_1:t2_v:t1_v:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key < '11') + type: boolean + Select Operator + expressions: + expr: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END + type: string + outputColumnNames: _col0 + Filter Operator + predicate: + expr: _col0 is null + type: boolean + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: + expr: (_col0 = 0) + type: boolean + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: bigint + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + sort order: + tag: 1 + t1_v:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key < '11') + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: 0 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + sq_1:t2_v:t1_v:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key < '11') + type: boolean + Select Operator + expressions: + expr: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: ((1 = 1) and _col1 is null) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * +from T1_v where T1_v.key not in (select T2_v.key from T2_v) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@t1_v +PREHOOK: Input: default@t2_v +#### A masked pattern was here #### +POSTHOOK: query: select * +from T1_v where T1_v.key not in (select T2_v.key from T2_v) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@t1_v +POSTHOOK: Input: default@t2_v +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.out ql/src/test/results/clientpositive/subquery_notin_having.q.out index f9598c2..1b15427 100644 --- ql/src/test/results/clientpositive/subquery_notin_having.q.out +++ ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -59,52 +59,93 @@ ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '12')))) (TOK_TABLE_OR_COL key)))))) STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-3 depends on stages: Stage-2 + Stage-5 is a root stage Stage-0 is a root stage STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: - src + sq_1_notin_nullcheck:sq_1:s1 TableScan - alias: src + alias: s1 + Filter Operator + predicate: + expr: ((key > '12') and key is null) + type: boolean + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: + expr: (_col0 = 0) + type: boolean Select Operator expressions: - expr: key - type: string - outputColumnNames: key + expr: _col0 + type: bigint + outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() bucketGroup: false keys: - expr: key - type: string + expr: _col0 + type: bigint mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + sort order: + tag: 1 + $INTNAME1 + TableScan + Reduce Output Operator + sort order: + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - mode: mergepartial + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false outputColumnNames: _col0, _col1 File Output Operator compressed: false @@ -114,7 +155,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: $INTNAME @@ -185,6 +226,56 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -217,8 +308,11 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-3 depends on stages: Stage-2, Stage-4 + Stage-4 is a root stage + Stage-5 is a root stage + Stage-6 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: @@ -288,16 +382,53 @@ STAGE PLANS: $INTNAME TableScan Reduce Output Operator - key expressions: + sort order: + tag: 0 + value expressions: expr: _col0 type: string expr: _col1 type: double + expr: _col1 + type: double + $INTNAME1 + TableScan + Reduce Output Operator + sort order: + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col5 + type: double sort order: ++ Map-reduce partition columns: expr: _col0 type: string - expr: _col1 + expr: _col5 type: double tag: 0 value expressions: @@ -351,7 +482,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: sq_1:a:part @@ -422,6 +553,119 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + sq_1_notin_nullcheck:sq_1:a:part + TableScan + alias: part + Select Operator + expressions: + expr: p_mfgr + type: string + expr: p_retailprice + type: double + outputColumnNames: p_mfgr, p_retailprice + Group By Operator + aggregations: + expr: min(p_retailprice) + expr: max(p_retailprice) + expr: avg(p_retailprice) + bucketGroup: false + keys: + expr: p_mfgr + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + expr: _col2 + type: double + expr: _col3 + type: struct + Reduce Operator Tree: + Group By Operator + aggregations: + expr: min(VALUE._col0) + expr: max(VALUE._col1) + expr: avg(VALUE._col2) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Filter Operator + predicate: + expr: (((_col2 - _col1) > 600) and (_col0 is null or _col1 is null)) + type: boolean + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: + expr: (_col0 = 0) + type: boolean + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: bigint + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -480,8 +724,11 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-6 is a root stage Stage-0 is a root stage STAGE PLANS: @@ -547,6 +794,41 @@ STAGE PLANS: $INTNAME TableScan Reduce Output Operator + sort order: + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: double + $INTNAME1 + TableScan + Reduce Output Operator + sort order: + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator key expressions: expr: _col0 type: string @@ -602,7 +884,120 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + sq_1_notin_nullcheck:sq_1:a + TableScan + alias: a + Filter Operator + predicate: + expr: p_mfgr is null + type: boolean + Select Operator + expressions: + expr: p_mfgr + type: string + expr: p_retailprice + type: double + outputColumnNames: p_mfgr, p_retailprice + Group By Operator + aggregations: + expr: max(p_retailprice) + expr: min(p_retailprice) + bucketGroup: false + keys: + expr: p_mfgr + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + expr: _col2 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: max(VALUE._col0) + expr: min(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: + expr: (((_col1 - _col2) > 600) and _col0 is null) + type: boolean + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: + expr: (_col0 = 0) + type: boolean + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: bigint + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: sq_1:a