diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 249728f..c545785 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12184,14 +12184,149 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel) return filterRel; } - private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel) + private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, + Map aliasToRel, boolean forHavingClause) + throws SemanticException { + /* + * Handle Subquery predicates. + * + */ + List subQueriesInOriginalTree = SubQueryUtils + .findSubQueries(searchCond); + if (subQueriesInOriginalTree.size() > 0) { + + /* + * Restriction.9.m :: disallow nested SubQuery expressions. + */ + if (qb.getSubQueryPredicateDef() != null) { + throw new SemanticException( + ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(0), + "Nested SubQuery expressions are not supported.")); + } + + /* + * Restriction.8.m :: We allow only 1 SubQuery expression per Query. + */ + if (subQueriesInOriginalTree.size() > 1) { + + throw new SemanticException( + ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(1), + "Only 1 SubQuery expression is supported.")); + } + + /* + * Clone the Search AST; apply all rewrites on the clone. + */ + ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor + .dupTree(searchCond); + List subQueries = SubQueryUtils + .findSubQueries(clonedSearchCond); + + RowResolver inputRR = m_relToHiveRR.get(srcRel); + + for (int i = 0; i < subQueries.size(); i++) { + ASTNode subQueryAST = subQueries.get(i); + ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); + + int sqIdx = qb.incrNumSubQueryPredicates(); + clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere( + clonedSearchCond, subQueryAST); + + QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, + subQueryAST, originalSubQueryAST, ctx); + + if (!forHavingClause) { + qb.setWhereClauseSubQueryPredicate(subQuery); + } else { + qb.setHavingClauseSubQueryPredicate(subQuery); + } + String havingInputAlias = null; + + if (forHavingClause) { + havingInputAlias = "gby_sq" + sqIdx; + aliasToRel.put(havingInputAlias, srcRel); + } + + subQuery.validateAndRewriteAST(inputRR, forHavingClause, + havingInputAlias, aliasToRel.keySet()); + + QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), + true); + qbSQ.setSubQueryDef(subQuery.getSubQuery()); + Phase1Ctx ctx_1 = initPhase1Ctx(); + doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1); + getMetaData(qbSQ); + RelNode subQueryRelNode = genLogicalPlan(qbSQ); + aliasToRel.put(subQuery.getAlias(), subQueryRelNode); + RowResolver sqRR = m_relToHiveRR.get(subQueryRelNode); + + /* + * Check.5.h :: For In and Not In the SubQuery must implicitly or + * explicitly only contain one select item. + */ + if (subQuery.getOperator().getType() != SubQueryType.EXISTS + && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS + && sqRR.getColumnInfos().size() + - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) { + throw new SemanticException( + ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST, + "SubQuery can contain only 1 item in Select List.")); + } + + /* + * If this is a Not In SubQuery Predicate then Join in the Null Check + * SubQuery. See QBSubQuery.NotInCheck for details on why and how this + * is constructed. + */ + if (subQuery.getNotInCheck() != null) { + QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); + notInCheck.setSQRR(sqRR); + QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), + notInCheck.getAlias(), true); + qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery()); + ctx_1 = initPhase1Ctx(); + doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1); + getMetaData(qbSQ_nic); + RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic); + aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode); + srcRel = genJoinRelNode(srcRel, subQueryNICRelNode, + // set explicitly to inner until we figure out SemiJoin use + // notInCheck.getJoinType(), + JoinType.INNER, notInCheck.getJoinConditionAST()); + inputRR = m_relToHiveRR.get(srcRel); + if (forHavingClause) { + aliasToRel.put(havingInputAlias, srcRel); + } + } + + /* + * Gen Join between outer Operator and SQ op + */ + subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, + havingInputAlias); + srcRel = genJoinRelNode(srcRel, subQueryRelNode, + subQuery.getJoinType(), subQuery.getJoinConditionAST()); + searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond); + + } + } + + return genFilterRelNode(searchCond, srcRel); + } + + private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, + Map aliasToRel, + boolean forHavingClause) throws SemanticException { RelNode filterRel = null; Iterator whereClauseIterator = getQBParseInfo(qb) .getDestToWhereExpr().values().iterator(); if (whereClauseIterator.hasNext()) { - filterRel = genFilterRelNode((ASTNode) whereClauseIterator.next().getChild(0), srcRel); + filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), + srcRel, aliasToRel, forHavingClause); } return filterRel; @@ -13079,7 +13214,7 @@ private RelNode genLogicalPlan(QB qb) throws SemanticException { } // 2. Build Rel for where Clause - filterRel = genFilterLogicalPlan(qb, srcRel); + filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false); srcRel = (filterRel == null) ? srcRel : filterRel; // 3. Build Rel for GB Clause @@ -13087,7 +13222,7 @@ private RelNode genLogicalPlan(QB qb) throws SemanticException { srcRel = (gbRel == null) ? srcRel : gbRel; // 4. Build Rel for GB Having Clause - gbHavingRel = genGBHavingLogicalPlan(qb, srcRel); + gbHavingRel = genGBHavingLogicalPlan(qb, srcRel, aliasToRel); srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel; // 5. Build Rel for Select Clause @@ -13132,7 +13267,8 @@ private RelNode genLogicalPlan(QB qb) throws SemanticException { return srcRel; } - private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) + private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, + Map aliasToRel) throws SemanticException { RelNode gbFilter = null; QBParseInfo qbp = getQBParseInfo(qb); @@ -13140,7 +13276,7 @@ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) .iterator().next()); if (havingClause != null) - gbFilter = genFilterRelNode((ASTNode) havingClause.getChild(0), srcRel); + gbFilter = genFilterRelNode(qb, (ASTNode) havingClause.getChild(0), srcRel, aliasToRel, true); return gbFilter; } diff --git ql/src/test/queries/clientpositive/cbo_correctness.q ql/src/test/queries/clientpositive/cbo_correctness.q index d1a5648..b904869 100644 --- ql/src/test/queries/clientpositive/cbo_correctness.q +++ ql/src/test/queries/clientpositive/cbo_correctness.q @@ -12,6 +12,44 @@ load data local inpath '../../data/files/cbo_t1.txt' into table t1 partition (dt load data local inpath '../../data/files/cbo_t2.txt' into table t2 partition (dt='2014'); load data local inpath '../../data/files/cbo_t3.txt' into table t3; +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part; + +DROP TABLE lineitem; +CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem; + +create table src_cbo as select * from src; + set hive.stats.dbclass=jdbc:derby; analyze table t1 partition (dt) compute statistics; analyze table t1 compute statistics for columns key, value, c_int, c_float, c_boolean; @@ -19,6 +57,12 @@ analyze table t2 partition (dt) compute statistics; analyze table t2 compute statistics for columns key, value, c_int, c_float, c_boolean; analyze table t3 compute statistics; analyze table t3 compute statistics for columns key, value, c_int, c_float, c_boolean; +analyze table src_cbo compute statistics; +analyze table src_cbo compute statistics for columns; +analyze table part compute statistics; +analyze table part compute statistics for columns; +analyze table lineitem compute statistics; +analyze table lineitem compute statistics for columns; set hive.stats.fetch.column.stats=true; set hive.auto.convert.join=false; @@ -212,3 +256,93 @@ drop view v1; drop view v2; drop view v3; drop view v4; + + +-- subquery tests +-- not in +-- non agg, non corr +explain +select * +from src_cbo +where src_cbo.key not in + ( select key from src_cbo s1 + where s1.key > '2' + ) +; +select * +from src_cbo +where src_cbo.key not in + ( select key from src_cbo s1 + where s1.key > '2' + ) +; + +-- non agg, corr +explain +select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size as r from part) a + where r < 10 and b.p_mfgr = a.p_mfgr + ) +; +select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size as r from part) a + where r < 10 and b.p_mfgr = a.p_mfgr + ) +; + +-- agg, non corr +explain +select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size from part) a + where p_size < 10 + ) +; +select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size from part) a + where p_size < 10 + ) +; + +-- agg, corr +explain +select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size from part) a + where p_size < 10 and b.p_mfgr = a.p_mfgr + ) +; +select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size from part) a + where p_size < 10 and b.p_mfgr = a.p_mfgr + ) +; + +-- non agg, non corr, Group By in Parent Query +explain +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +group by li.l_partkey +; +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +group by li.l_partkey +; \ No newline at end of file diff --git ql/src/test/results/clientpositive/cbo_correctness.q.out ql/src/test/results/clientpositive/cbo_correctness.q.out index 9391828..7b8b421 100644 --- ql/src/test/results/clientpositive/cbo_correctness.q.out +++ ql/src/test/results/clientpositive/cbo_correctness.q.out @@ -57,6 +57,101 @@ POSTHOOK: query: load data local inpath '../../data/files/cbo_t3.txt' into table POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@t3 +PREHOOK: query: CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: DROP TABLE lineitem +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE lineitem +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem +PREHOOK: query: create table src_cbo as select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table src_cbo as select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_cbo PREHOOK: query: analyze table t1 partition (dt) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -117,6 +212,54 @@ POSTHOOK: query: analyze table t3 compute statistics for columns key, value, c_i POSTHOOK: type: QUERY POSTHOOK: Input: default@t3 #### A masked pattern was here #### +PREHOOK: query: analyze table src_cbo compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +PREHOOK: Output: default@src_cbo +POSTHOOK: query: analyze table src_cbo compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_cbo +POSTHOOK: Output: default@src_cbo +PREHOOK: query: analyze table src_cbo compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +#### A masked pattern was here #### +POSTHOOK: query: analyze table src_cbo compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_cbo +#### A masked pattern was here #### +PREHOOK: query: analyze table part compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Output: default@part +POSTHOOK: query: analyze table part compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Output: default@part +PREHOOK: query: analyze table part compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: analyze table part compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +PREHOOK: query: analyze table lineitem compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +PREHOOK: Output: default@lineitem +POSTHOOK: query: analyze table lineitem compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +POSTHOOK: Output: default@lineitem +PREHOOK: query: analyze table lineitem compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: analyze table lineitem compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### PREHOOK: query: -- 1. Test Select + TS select * from t1 PREHOOK: type: QUERY @@ -16420,3 +16563,1227 @@ POSTHOOK: query: drop view v4 POSTHOOK: type: DROPVIEW POSTHOOK: Input: default@v4 POSTHOOK: Output: default@v4 +Warning: Shuffle Join JOIN[24][tables = [$hdt$_382, $hdt$_387]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: -- subquery tests +-- not in +-- non agg, non corr +explain +select * +from src_cbo +where src_cbo.key not in + ( select key from src_cbo s1 + where s1.key > '2' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- subquery tests +-- not in +-- non agg, non corr +explain +select * +from src_cbo +where src_cbo.key not in + ( select key from src_cbo s1 + where s1.key > '2' + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src_cbo + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: bigint) + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 601 Data size: 111786 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 300 Data size: 55800 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 300 Data size: 29700 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 300 Data size: 29700 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[24][tables = [$hdt$_392, $hdt$_397]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: select * +from src_cbo +where src_cbo.key not in + ( select key from src_cbo s1 + where s1.key > '2' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +#### A masked pattern was here #### +POSTHOOK: query: select * +from src_cbo +where src_cbo.key not in + ( select key from src_cbo s1 + where s1.key > '2' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_cbo +#### A masked pattern was here #### +0 val_0 0 NULL +0 val_0 0 NULL +0 val_0 0 NULL +10 val_10 0 NULL +100 val_100 0 NULL +100 val_100 0 NULL +103 val_103 0 NULL +103 val_103 0 NULL +104 val_104 0 NULL +104 val_104 0 NULL +105 val_105 0 NULL +11 val_11 0 NULL +111 val_111 0 NULL +113 val_113 0 NULL +113 val_113 0 NULL +114 val_114 0 NULL +116 val_116 0 NULL +118 val_118 0 NULL +118 val_118 0 NULL +119 val_119 0 NULL +119 val_119 0 NULL +119 val_119 0 NULL +12 val_12 0 NULL +12 val_12 0 NULL +120 val_120 0 NULL +120 val_120 0 NULL +125 val_125 0 NULL +125 val_125 0 NULL +126 val_126 0 NULL +128 val_128 0 NULL +128 val_128 0 NULL +128 val_128 0 NULL +129 val_129 0 NULL +129 val_129 0 NULL +131 val_131 0 NULL +133 val_133 0 NULL +134 val_134 0 NULL +134 val_134 0 NULL +136 val_136 0 NULL +137 val_137 0 NULL +137 val_137 0 NULL +138 val_138 0 NULL +138 val_138 0 NULL +138 val_138 0 NULL +138 val_138 0 NULL +143 val_143 0 NULL +145 val_145 0 NULL +146 val_146 0 NULL +146 val_146 0 NULL +149 val_149 0 NULL +149 val_149 0 NULL +15 val_15 0 NULL +15 val_15 0 NULL +150 val_150 0 NULL +152 val_152 0 NULL +152 val_152 0 NULL +153 val_153 0 NULL +155 val_155 0 NULL +156 val_156 0 NULL +157 val_157 0 NULL +158 val_158 0 NULL +160 val_160 0 NULL +162 val_162 0 NULL +163 val_163 0 NULL +164 val_164 0 NULL +164 val_164 0 NULL +165 val_165 0 NULL +165 val_165 0 NULL +166 val_166 0 NULL +167 val_167 0 NULL +167 val_167 0 NULL +167 val_167 0 NULL +168 val_168 0 NULL +169 val_169 0 NULL +169 val_169 0 NULL +169 val_169 0 NULL +169 val_169 0 NULL +17 val_17 0 NULL +170 val_170 0 NULL +172 val_172 0 NULL +172 val_172 0 NULL +174 val_174 0 NULL +174 val_174 0 NULL +175 val_175 0 NULL +175 val_175 0 NULL +176 val_176 0 NULL +176 val_176 0 NULL +177 val_177 0 NULL +178 val_178 0 NULL +179 val_179 0 NULL +179 val_179 0 NULL +18 val_18 0 NULL +18 val_18 0 NULL +180 val_180 0 NULL +181 val_181 0 NULL +183 val_183 0 NULL +186 val_186 0 NULL +187 val_187 0 NULL +187 val_187 0 NULL +187 val_187 0 NULL +189 val_189 0 NULL +19 val_19 0 NULL +190 val_190 0 NULL +191 val_191 0 NULL +191 val_191 0 NULL +192 val_192 0 NULL +193 val_193 0 NULL +193 val_193 0 NULL +193 val_193 0 NULL +194 val_194 0 NULL +195 val_195 0 NULL +195 val_195 0 NULL +196 val_196 0 NULL +197 val_197 0 NULL +197 val_197 0 NULL +199 val_199 0 NULL +199 val_199 0 NULL +199 val_199 0 NULL +2 val_2 0 NULL +Warning: Shuffle Join JOIN[26][tables = [$hdt$_402, $hdt$_408]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: -- non agg, corr +explain +select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size as r from part) a + where r < 10 and b.p_mfgr = a.p_mfgr + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr +explain +select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size as r from part) a + where r < 10 and b.p_mfgr = a.p_mfgr + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((p_size < 10) and (p_name is null or p_mfgr is null)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 231 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 231 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 6377 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 28 Data size: 6377 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_size < 10) (type: boolean) + Statistics: Num rows: 8 Data size: 1784 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_mfgr (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 30 Data size: 7014 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col4 is null (type: boolean) + Statistics: Num rows: 15 Data size: 3507 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3507 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 3507 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[26][tables = [$hdt$_414, $hdt$_420]] in Stage 'Stage-3:MAPRED' is a cross product +PREHOOK: query: select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size as r from part) a + where r < 10 and b.p_mfgr = a.p_mfgr + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size as r from part) a + where r < 10 and b.p_mfgr = a.p_mfgr + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#5 almond antique blue firebrick mint 31 +Manufacturer#3 almond antique chartreuse khaki white 17 +Manufacturer#1 almond antique chartreuse lavender yellow 34 +Manufacturer#3 almond antique forest lavender goldenrod 14 +Manufacturer#4 almond antique gainsboro frosted violet 10 +Manufacturer#3 almond antique metallic orange dim 19 +Manufacturer#3 almond antique olive coral navajo 45 +Manufacturer#2 almond antique violet chocolate turquoise 14 +Manufacturer#4 almond antique violet mint lemon 39 +Manufacturer#2 almond antique violet turquoise frosted 40 +Manufacturer#1 almond aquamarine burnished black steel 28 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 +Manufacturer#4 almond aquamarine floral ivory bisque 27 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 +Manufacturer#2 almond aquamarine rose maroon antique 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 +Manufacturer#4 almond azure aquamarine papaya violet 12 +Manufacturer#5 almond azure blanched chiffon midnight 23 +Warning: Shuffle Join JOIN[39][tables = [$hdt$_426, $hdt$_430, $hdt$_437]] in Stage 'Stage-3:MAPRED' is a cross product +PREHOOK: query: -- agg, non corr +explain +select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size from part) a + where p_size < 10 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, non corr +explain +select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size from part) a + where p_size < 10 + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-5 + Stage-5 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_size < 10) (type: boolean) + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_size (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: avg(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col2 is null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_size < 10) (type: boolean) + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_size (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: avg(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col0 is null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[39][tables = [$hdt$_440, $hdt$_444, $hdt$_451]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size from part) a + where p_size < 10 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size from part) a + where p_size < 10 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond aquamarine dodger light gainsboro 46 +almond antique olive coral navajo 45 +almond aquamarine pink moccasin thistle 42 +almond antique violet turquoise frosted 40 +almond antique violet mint lemon 39 +almond antique chartreuse lavender yellow 34 +almond antique blue firebrick mint 31 +almond aquamarine burnished black steel 28 +almond aquamarine floral ivory bisque 27 +almond aquamarine rose maroon antique 25 +almond azure blanched chiffon midnight 23 +almond antique metallic orange dim 19 +almond aquamarine sandy cyan gainsboro 18 +almond antique chartreuse khaki white 17 +almond antique violet chocolate turquoise 14 +almond antique forest lavender goldenrod 14 +almond azure aquamarine papaya violet 12 +almond antique gainsboro frosted violet 10 +almond aquamarine yellow dodger mint 7 +almond antique salmon chartreuse burlywood 6 +almond antique medium spring khaki 6 +almond antique burnished rose metallic 2 +almond aquamarine midnight light salmon 2 +almond antique sky peru orange 2 +almond antique burnished rose metallic 2 +almond antique misty red olive 1 +Warning: Shuffle Join JOIN[36][tables = [$hdt$_454, $hdt$_461]] in Stage 'Stage-5:MAPRED' is a cross product +PREHOOK: query: -- agg, corr +explain +select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size from part) a + where p_size < 10 and b.p_mfgr = a.p_mfgr + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, corr +explain +select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size from part) a + where p_size < 10 and b.p_mfgr = a.p_mfgr + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_size < 10) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 6377 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey1} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 30 Data size: 7014 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col4 is null (type: boolean) + Statistics: Num rows: 15 Data size: 3507 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3507 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 3507 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_size < 10) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 is null or _col0 is null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 6377 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[36][tables = [$hdt$_468, $hdt$_475]] in Stage 'Stage-5:MAPRED' is a cross product +PREHOOK: query: select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size from part) a + where p_size < 10 and b.p_mfgr = a.p_mfgr + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size from part) a + where p_size < 10 and b.p_mfgr = a.p_mfgr + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique salmon chartreuse burlywood 6 +Manufacturer#5 almond antique medium spring khaki 6 +Manufacturer#4 almond antique gainsboro frosted violet 10 +Manufacturer#4 almond azure aquamarine papaya violet 12 +Manufacturer#2 almond antique violet chocolate turquoise 14 +Manufacturer#3 almond antique forest lavender goldenrod 14 +Manufacturer#3 almond antique chartreuse khaki white 17 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 +Manufacturer#3 almond antique metallic orange dim 19 +Manufacturer#5 almond azure blanched chiffon midnight 23 +Manufacturer#2 almond aquamarine rose maroon antique 25 +Manufacturer#4 almond aquamarine floral ivory bisque 27 +Manufacturer#1 almond aquamarine burnished black steel 28 +Manufacturer#5 almond antique blue firebrick mint 31 +Manufacturer#1 almond antique chartreuse lavender yellow 34 +Manufacturer#4 almond antique violet mint lemon 39 +Manufacturer#2 almond antique violet turquoise frosted 40 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 +Manufacturer#3 almond antique olive coral navajo 45 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 +Warning: Shuffle Join JOIN[24][tables = [$hdt$_482, $hdt$_487]] in Stage 'Stage-3:MAPRED' is a cross product +PREHOOK: query: -- non agg, non corr, Group By in Parent Query +explain +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +group by li.l_partkey +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr, Group By in Parent Query +explain +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +group by li.l_partkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((l_shipmode = 'AIR') and l_orderkey is null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: li + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (l_linenumber = 1) (type: boolean) + Statistics: Num rows: 16 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_orderkey (type: int), l_partkey (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (l_shipmode = 'AIR') (type: boolean) + Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_orderkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 17 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col1, _col4 + Statistics: Num rows: 18 Data size: 154 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col4 is null (type: boolean) + Statistics: Num rows: 9 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 77 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 77 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[24][tables = [$hdt$_493, $hdt$_498]] in Stage 'Stage-3:MAPRED' is a cross product +PREHOOK: query: select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +group by li.l_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +group by li.l_partkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +450 1 +7068 1 +21636 1 +22630 1 +59694 1 +61931 1 +85951 1 +88035 1 +88362 1 +106170 1 +119477 1 +119767 1 +123076 1 +139636 1 +175839 1 +182052 1