diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java index 3fb928c..fda53c1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java @@ -131,8 +131,8 @@ public static RexNode projectNonColumnEquiConditions(ProjectFactory factory, for (i = 0; i < origColEqConds.size(); i++) { Pair p = origColEqConds.get(i); - RexNode leftKey = leftJoinKeys.get(p.left); - RexNode rightKey = rightJoinKeys.get(p.right); + RexNode leftKey = leftJoinKeys.get(i); + RexNode rightKey = rightJoinKeys.get(i); leftKeys.add(p.left); rightKeys.add(p.right); RexNode cond = rexBuilder.makeCall( diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java index 0faefc6..6b38df4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java @@ -355,12 +355,16 @@ public void setExpressionMap(Map expressionMap) { // TODO: 1) How to handle collisions? 2) Should we be cloning ColumnInfo or // not? public static int add(RowResolver rrToAddTo, RowResolver rrToAddFrom, - int outputColPos) throws SemanticException { + int outputColPos, int numColumns) throws SemanticException { String tabAlias; String colAlias; String[] qualifiedColName; + int i = 0; for (ColumnInfo cInfoFrmInput : rrToAddFrom.getRowSchema().getSignature()) { + if ( numColumns >= 0 && i == numColumns ) { + break; + } ColumnInfo newCI = null; qualifiedColName = rrToAddFrom.getInvRslvMap().get( cInfoFrmInput.getInternalName()); @@ -377,11 +381,25 @@ public static int add(RowResolver rrToAddTo, RowResolver rrToAddFrom, throw new RuntimeException("Ambigous Column Names"); rrToAddTo.put(tabAlias, colAlias, newCI); + + qualifiedColName = rrToAddFrom.getAlternateMappings(cInfoFrmInput + .getInternalName()); + if (qualifiedColName != null) { + tabAlias = qualifiedColName[0]; + colAlias = qualifiedColName[1]; + rrToAddTo.put(tabAlias, colAlias, newCI); + } + i++; } return outputColPos; } + public static int add(RowResolver rrToAddTo, RowResolver rrToAddFrom, + int outputColPos) throws SemanticException { + return add(rrToAddTo, rrToAddFrom, outputColPos, -1); + } + /** * Return a new row resolver that is combination of left RR and right RR. * The schema will be schema of left, schema of right diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index dc13a22..f1441b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12374,14 +12374,197 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel) return filterRel; } - private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel) + private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, + Map aliasToRel, boolean forHavingClause) + throws SemanticException { + /* + * Handle Subquery predicates. + * + * Notes (8/22/14 hb): + * Why is this a copy of the code from {@link #genFilterPlan} + * - for now we will support the same behavior as non CBO route. + * - but plan to allow nested SubQueries(Restriction.9.m) and + * multiple SubQuery expressions(Restriction.8.m). This + * requires use to utilize Optiq's Decorrelation mechanics, + * and for Optiq to fix/flush out Null semantics(OPTIQ-373) + * - besides only the driving code has been copied. Most of + * the code which is SubQueryUtils and QBSubQuery is reused. + * + */ + int numSrcColumns = srcRel.getRowType().getFieldCount(); + List subQueriesInOriginalTree = SubQueryUtils + .findSubQueries(searchCond); + if (subQueriesInOriginalTree.size() > 0) { + + /* + * Restriction.9.m :: disallow nested SubQuery expressions. + */ + if (qb.getSubQueryPredicateDef() != null) { + throw new SemanticException( + ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(0), + "Nested SubQuery expressions are not supported.")); + } + + /* + * Restriction.8.m :: We allow only 1 SubQuery expression per Query. + */ + if (subQueriesInOriginalTree.size() > 1) { + + throw new SemanticException( + ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(1), + "Only 1 SubQuery expression is supported.")); + } + + /* + * Clone the Search AST; apply all rewrites on the clone. + */ + ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor + .dupTree(searchCond); + List subQueries = SubQueryUtils + .findSubQueries(clonedSearchCond); + + RowResolver inputRR = m_relToHiveRR.get(srcRel); + + for (int i = 0; i < subQueries.size(); i++) { + ASTNode subQueryAST = subQueries.get(i); + ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); + + int sqIdx = qb.incrNumSubQueryPredicates(); + clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere( + clonedSearchCond, subQueryAST); + + QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, + subQueryAST, originalSubQueryAST, ctx); + + if (!forHavingClause) { + qb.setWhereClauseSubQueryPredicate(subQuery); + } else { + qb.setHavingClauseSubQueryPredicate(subQuery); + } + String havingInputAlias = null; + + if (forHavingClause) { + havingInputAlias = "gby_sq" + sqIdx; + aliasToRel.put(havingInputAlias, srcRel); + } + + subQuery.validateAndRewriteAST(inputRR, forHavingClause, + havingInputAlias, aliasToRel.keySet()); + + QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), + true); + qbSQ.setSubQueryDef(subQuery.getSubQuery()); + Phase1Ctx ctx_1 = initPhase1Ctx(); + doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1); + getMetaData(qbSQ); + RelNode subQueryRelNode = genLogicalPlan(qbSQ); + aliasToRel.put(subQuery.getAlias(), subQueryRelNode); + RowResolver sqRR = m_relToHiveRR.get(subQueryRelNode); + + /* + * Check.5.h :: For In and Not In the SubQuery must implicitly or + * explicitly only contain one select item. + */ + if (subQuery.getOperator().getType() != SubQueryType.EXISTS + && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS + && sqRR.getColumnInfos().size() + - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) { + throw new SemanticException( + ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST, + "SubQuery can contain only 1 item in Select List.")); + } + + /* + * If this is a Not In SubQuery Predicate then Join in the Null Check + * SubQuery. See QBSubQuery.NotInCheck for details on why and how this + * is constructed. + */ + if (subQuery.getNotInCheck() != null) { + QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); + notInCheck.setSQRR(sqRR); + QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), + notInCheck.getAlias(), true); + qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery()); + ctx_1 = initPhase1Ctx(); + doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1); + getMetaData(qbSQ_nic); + RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic); + aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode); + srcRel = genJoinRelNode(srcRel, subQueryNICRelNode, + // set explicitly to inner until we figure out SemiJoin use + // notInCheck.getJoinType(), + JoinType.INNER, notInCheck.getJoinConditionAST()); + inputRR = m_relToHiveRR.get(srcRel); + if (forHavingClause) { + aliasToRel.put(havingInputAlias, srcRel); + } + } + + /* + * Gen Join between outer Operator and SQ op + */ + subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, + havingInputAlias); + srcRel = genJoinRelNode(srcRel, subQueryRelNode, + subQuery.getJoinType(), subQuery.getJoinConditionAST()); + searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond); + + srcRel = genFilterRelNode(searchCond, srcRel); + + /* + * For Not Exists and Not In, add a projection on top of the Left + * Outer Join. + */ + if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS + || subQuery.getOperator().getType() != SubQueryType.NOT_IN) { + srcRel = projectLeftOuterSide(srcRel, numSrcColumns); + } + } + return srcRel; + } + + return genFilterRelNode(searchCond, srcRel); + } + + private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) + throws SemanticException { + RowResolver iRR = m_relToHiveRR.get(srcRel); + RowResolver oRR = new RowResolver(); + RowResolver.add(oRR, iRR, 0, numColumns); + + List optiqColLst = new ArrayList(); + List oFieldNames = new ArrayList(); + RelDataType iType = srcRel.getRowType(); + + for (int i = 0; i < iType.getFieldCount(); i++) { + RelDataTypeField fType = iType.getFieldList().get(i); + String fName = iType.getFieldNames().get(i); + optiqColLst.add(m_cluster.getRexBuilder().makeInputRef(fType.getType(), + i)); + oFieldNames.add(fName); + } + + HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, oFieldNames); + + this.m_relToHiveColNameOptiqPosMap.put(selRel, + buildHiveToOptiqColumnMap(oRR, selRel)); + this.m_relToHiveRR.put(selRel, oRR); + return selRel; + } + + private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, + Map aliasToRel, + boolean forHavingClause) throws SemanticException { RelNode filterRel = null; Iterator whereClauseIterator = getQBParseInfo(qb) .getDestToWhereExpr().values().iterator(); if (whereClauseIterator.hasNext()) { - filterRel = genFilterRelNode((ASTNode) whereClauseIterator.next().getChild(0), srcRel); + filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), + srcRel, aliasToRel, forHavingClause); } return filterRel; @@ -13263,7 +13446,7 @@ private RelNode genLogicalPlan(QB qb) throws SemanticException { } // 2. Build Rel for where Clause - filterRel = genFilterLogicalPlan(qb, srcRel); + filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false); srcRel = (filterRel == null) ? srcRel : filterRel; // 3. Build Rel for GB Clause @@ -13271,7 +13454,7 @@ private RelNode genLogicalPlan(QB qb) throws SemanticException { srcRel = (gbRel == null) ? srcRel : gbRel; // 4. Build Rel for GB Having Clause - gbHavingRel = genGBHavingLogicalPlan(qb, srcRel); + gbHavingRel = genGBHavingLogicalPlan(qb, srcRel, aliasToRel); srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel; // 5. Build Rel for Select Clause @@ -13316,7 +13499,8 @@ private RelNode genLogicalPlan(QB qb) throws SemanticException { return srcRel; } - private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) + private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, + Map aliasToRel) throws SemanticException { RelNode gbFilter = null; QBParseInfo qbp = getQBParseInfo(qb); @@ -13324,7 +13508,7 @@ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) .iterator().next()); if (havingClause != null) - gbFilter = genFilterRelNode((ASTNode) havingClause.getChild(0), srcRel); + gbFilter = genFilterRelNode(qb, (ASTNode) havingClause.getChild(0), srcRel, aliasToRel, true); return gbFilter; } diff --git ql/src/test/queries/clientpositive/cbo_correctness.q ql/src/test/queries/clientpositive/cbo_correctness.q index 8cc5045..d4e069f 100644 --- ql/src/test/queries/clientpositive/cbo_correctness.q +++ ql/src/test/queries/clientpositive/cbo_correctness.q @@ -12,6 +12,45 @@ load data local inpath '../../data/files/cbo_t1.txt' into table t1 partition (dt load data local inpath '../../data/files/cbo_t2.txt' into table t2 partition (dt='2014'); load data local inpath '../../data/files/cbo_t3.txt' into table t3; +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part; + +DROP TABLE lineitem; +CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem; + +create table src_cbo as select * from src; + + set hive.stats.dbclass=jdbc:derby; analyze table t1 partition (dt) compute statistics; analyze table t1 compute statistics for columns key, value, c_int, c_float, c_boolean; @@ -19,6 +58,12 @@ analyze table t2 partition (dt) compute statistics; analyze table t2 compute statistics for columns key, value, c_int, c_float, c_boolean; analyze table t3 compute statistics; analyze table t3 compute statistics for columns key, value, c_int, c_float, c_boolean; +analyze table src_cbo compute statistics; +analyze table src_cbo compute statistics for columns; +analyze table part compute statistics; +analyze table part compute statistics for columns; +analyze table lineitem compute statistics; +analyze table lineitem compute statistics for columns; set hive.stats.fetch.column.stats=true; set hive.auto.convert.join=false; @@ -223,3 +268,185 @@ select key from t1 where c_int = -6 or c_int = +6; -- 15. query referencing only partition columns select count(t1.dt) from t1 join t2 on t1.dt = t2.dt where t1.dt = '2014' ; + +-- 16. SubQueries Not In +-- non agg, non corr +select * +from src_cbo +where src_cbo.key not in + ( select key from src_cbo s1 + where s1.key > '2' + ) +; + +-- non agg, corr +select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size as r from part) a + where r < 10 and b.p_mfgr = a.p_mfgr + ) +; + +-- agg, non corr +select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size from part) a + where p_size < 10 + ) +; + +-- agg, corr +select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size from part) a + where p_size < 10 and b.p_mfgr = a.p_mfgr + ) +; + +-- non agg, non corr, Group By in Parent Query +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +group by li.l_partkey +; + +-- add null check test from sq_notin.q once HIVE-7721 resolved. + +-- non agg, corr, having +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +; + +-- agg, non corr, having +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +; + +-- 17. SubQueries In +-- non agg, non corr +select * +from src_cbo +where src_cbo.key in (select key from src_cbo s1 where s1.key > '9') +; + +-- agg, corr +-- add back once rank issue fixed for cbo + +-- distinct, corr +select * +from src_cbo b +where b.key in + (select distinct a.key + from src_cbo a + where b.value = a.value and a.key > '9' + ) +; + +-- non agg, corr, with join in Parent Query +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +; + +-- where and having +-- Plan is: +-- Stage 1: b semijoin sq1:src_cbo (subquery in where) +-- Stage 2: group by Stage 1 o/p +-- Stage 5: group by on sq2:src_cbo (subquery in having) +-- Stage 6: Stage 2 o/p semijoin Stage 5 +explain +select key, value, count(*) +from src_cbo b +where b.key in (select key from src_cbo where src_cbo.key > '8') +group by key, value +having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key ) +; + +-- non agg, non corr, windowing +explain +select p_mfgr, p_name, avg(p_size) +from part +group by p_mfgr, p_name +having p_name in + (select first_value(p_name) over(partition by p_mfgr order by p_size) from part) +; + +-- 18. SubQueries Not Exists +-- distinct, corr +select * +from src_cbo b +where not exists + (select distinct a.key + from src_cbo a + where b.value = a.value and a.value > 'val_2' + ) +; + +-- no agg, corr, having +select * +from src_cbo b +group by key, value +having not exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_12' + ) +; + +-- 19. SubQueries Exists +-- view test +create view cv1 as +select * +from src_cbo b +where exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +; + +select * from cv1 +; + +-- sq in from +select * +from (select * + from src_cbo b + where exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_9') + ) a +; + +-- sq in from, having +select * +from (select b.key, count(*) + from src_cbo b + group by b.key + having exists + (select a.key + from src_cbo a + where a.key = b.key and a.value > 'val_9' + ) +) a +; diff --git ql/src/test/results/clientpositive/cbo_correctness.q.out ql/src/test/results/clientpositive/cbo_correctness.q.out index daeacd8..21fe213 100644 --- ql/src/test/results/clientpositive/cbo_correctness.q.out +++ ql/src/test/results/clientpositive/cbo_correctness.q.out @@ -60,6 +60,103 @@ POSTHOOK: query: load data local inpath '../../data/files/cbo_t3.txt' into table POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@t3 +PREHOOK: query: CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: DROP TABLE lineitem +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE lineitem +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem +POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem +PREHOOK: query: create table src_cbo as select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table src_cbo as select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_cbo PREHOOK: query: analyze table t1 partition (dt) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -120,6 +217,54 @@ POSTHOOK: query: analyze table t3 compute statistics for columns key, value, c_i POSTHOOK: type: QUERY POSTHOOK: Input: default@t3 #### A masked pattern was here #### +PREHOOK: query: analyze table src_cbo compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +PREHOOK: Output: default@src_cbo +POSTHOOK: query: analyze table src_cbo compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_cbo +POSTHOOK: Output: default@src_cbo +PREHOOK: query: analyze table src_cbo compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +#### A masked pattern was here #### +POSTHOOK: query: analyze table src_cbo compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_cbo +#### A masked pattern was here #### +PREHOOK: query: analyze table part compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Output: default@part +POSTHOOK: query: analyze table part compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Output: default@part +PREHOOK: query: analyze table part compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: analyze table part compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +PREHOOK: query: analyze table lineitem compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +PREHOOK: Output: default@lineitem +POSTHOOK: query: analyze table lineitem compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +POSTHOOK: Output: default@lineitem +PREHOOK: query: analyze table lineitem compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: analyze table lineitem compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### PREHOOK: query: -- 1. Test Select + TS select * from t1 PREHOOK: type: QUERY @@ -17911,3 +18056,1122 @@ POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t2@dt=2014 #### A masked pattern was here #### 400 +Warning: Shuffle Join JOIN[24][tables = [$hdt$_519, $hdt$_524]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: -- 16. SubQueries Not In +-- non agg, non corr +select * +from src_cbo +where src_cbo.key not in + ( select key from src_cbo s1 + where s1.key > '2' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +#### A masked pattern was here #### +POSTHOOK: query: -- 16. SubQueries Not In +-- non agg, non corr +select * +from src_cbo +where src_cbo.key not in + ( select key from src_cbo s1 + where s1.key > '2' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_cbo +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +Warning: Shuffle Join JOIN[26][tables = [$hdt$_530, $hdt$_536]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: -- non agg, corr +select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size as r from part) a + where r < 10 and b.p_mfgr = a.p_mfgr + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: -- non agg, corr +select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size as r from part) a + where r < 10 and b.p_mfgr = a.p_mfgr + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#5 almond antique blue firebrick mint 31 +Manufacturer#3 almond antique chartreuse khaki white 17 +Manufacturer#1 almond antique chartreuse lavender yellow 34 +Manufacturer#3 almond antique forest lavender goldenrod 14 +Manufacturer#4 almond antique gainsboro frosted violet 10 +Manufacturer#3 almond antique metallic orange dim 19 +Manufacturer#3 almond antique olive coral navajo 45 +Manufacturer#2 almond antique violet chocolate turquoise 14 +Manufacturer#4 almond antique violet mint lemon 39 +Manufacturer#2 almond antique violet turquoise frosted 40 +Manufacturer#1 almond aquamarine burnished black steel 28 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 +Manufacturer#4 almond aquamarine floral ivory bisque 27 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 +Manufacturer#2 almond aquamarine rose maroon antique 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 +Manufacturer#4 almond azure aquamarine papaya violet 12 +Manufacturer#5 almond azure blanched chiffon midnight 23 +Warning: Shuffle Join JOIN[39][tables = [$hdt$_543, $hdt$_547, $hdt$_554]] in Stage 'Stage-3:MAPRED' is a cross product +PREHOOK: query: -- agg, non corr +select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size from part) a + where p_size < 10 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: -- agg, non corr +select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size from part) a + where p_size < 10 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond aquamarine dodger light gainsboro 46 +almond antique olive coral navajo 45 +almond aquamarine pink moccasin thistle 42 +almond antique violet turquoise frosted 40 +almond antique violet mint lemon 39 +almond antique chartreuse lavender yellow 34 +almond antique blue firebrick mint 31 +almond aquamarine burnished black steel 28 +almond aquamarine floral ivory bisque 27 +almond aquamarine rose maroon antique 25 +almond azure blanched chiffon midnight 23 +almond antique metallic orange dim 19 +almond aquamarine sandy cyan gainsboro 18 +almond antique chartreuse khaki white 17 +almond antique violet chocolate turquoise 14 +almond antique forest lavender goldenrod 14 +almond azure aquamarine papaya violet 12 +almond antique gainsboro frosted violet 10 +almond aquamarine yellow dodger mint 7 +almond antique salmon chartreuse burlywood 6 +almond antique medium spring khaki 6 +almond antique burnished rose metallic 2 +almond aquamarine midnight light salmon 2 +almond antique sky peru orange 2 +almond antique burnished rose metallic 2 +almond antique misty red olive 1 +Warning: Shuffle Join JOIN[36][tables = [$hdt$_558, $hdt$_565]] in Stage 'Stage-3:MAPRED' is a cross product +PREHOOK: query: -- agg, corr +select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size from part) a + where p_size < 10 and b.p_mfgr = a.p_mfgr + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: -- agg, corr +select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size from part) a + where p_size < 10 and b.p_mfgr = a.p_mfgr + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique salmon chartreuse burlywood 6 +Manufacturer#5 almond antique medium spring khaki 6 +Manufacturer#4 almond antique gainsboro frosted violet 10 +Manufacturer#4 almond azure aquamarine papaya violet 12 +Manufacturer#2 almond antique violet chocolate turquoise 14 +Manufacturer#3 almond antique forest lavender goldenrod 14 +Manufacturer#3 almond antique chartreuse khaki white 17 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 +Manufacturer#3 almond antique metallic orange dim 19 +Manufacturer#5 almond azure blanched chiffon midnight 23 +Manufacturer#2 almond aquamarine rose maroon antique 25 +Manufacturer#4 almond aquamarine floral ivory bisque 27 +Manufacturer#1 almond aquamarine burnished black steel 28 +Manufacturer#5 almond antique blue firebrick mint 31 +Manufacturer#1 almond antique chartreuse lavender yellow 34 +Manufacturer#4 almond antique violet mint lemon 39 +Manufacturer#2 almond antique violet turquoise frosted 40 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 +Manufacturer#3 almond antique olive coral navajo 45 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 +Warning: Shuffle Join JOIN[24][tables = [$hdt$_573, $hdt$_578]] in Stage 'Stage-3:MAPRED' is a cross product +PREHOOK: query: -- non agg, non corr, Group By in Parent Query +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +group by li.l_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: -- non agg, non corr, Group By in Parent Query +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +group by li.l_partkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +450 1 +7068 1 +21636 1 +22630 1 +59694 1 +61931 1 +85951 1 +88035 1 +88362 1 +106170 1 +119477 1 +119767 1 +123076 1 +139636 1 +175839 1 +182052 1 +Warning: Shuffle Join JOIN[41][tables = [$hdt$_586, $hdt$_593]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: -- add null check test from sq_notin.q once HIVE-7721 resolved. + +-- non agg, corr, having +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: -- add null check test from sq_notin.q once HIVE-7721 resolved. + +-- non agg, corr, having +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 1173.15 +Manufacturer#2 1690.68 +Warning: Shuffle Join JOIN[39][tables = [$hdt$_602, $hdt$_608]] in Stage 'Stage-3:MAPRED' is a cross product +PREHOOK: query: -- agg, non corr, having +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: -- agg, non corr, having +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 1173.15 +Manufacturer#2 1690.68 +PREHOOK: query: -- 17. SubQueries In +-- non agg, non corr +select * +from src_cbo +where src_cbo.key in (select key from src_cbo s1 where s1.key > '9') +PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +#### A masked pattern was here #### +POSTHOOK: query: -- 17. SubQueries In +-- non agg, non corr +select * +from src_cbo +where src_cbo.key in (select key from src_cbo s1 where s1.key > '9') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_cbo +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- agg, corr +-- add back once rank issue fixed for cbo + +-- distinct, corr +select * +from src_cbo b +where b.key in + (select distinct a.key + from src_cbo a + where b.value = a.value and a.key > '9' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +#### A masked pattern was here #### +POSTHOOK: query: -- agg, corr +-- add back once rank issue fixed for cbo + +-- distinct, corr +select * +from src_cbo b +where b.key in + (select distinct a.key + from src_cbo a + where b.value = a.value and a.key > '9' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_cbo +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- non agg, corr, with join in Parent Query +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: -- non agg, corr, with join in Parent Query +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +4297 1798 +108570 8571 +PREHOOK: query: -- where and having +-- Plan is: +-- Stage 1: b semijoin sq1:src_cbo (subquery in where) +-- Stage 2: group by Stage 1 o/p +-- Stage 5: group by on sq2:src_cbo (subquery in having) +-- Stage 6: Stage 2 o/p semijoin Stage 5 +explain +select key, value, count(*) +from src_cbo b +where b.key in (select key from src_cbo where src_cbo.key > '8') +group by key, value +having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: -- where and having +-- Plan is: +-- Stage 1: b semijoin sq1:src_cbo (subquery in where) +-- Stage 2: group by Stage 1 o/p +-- Stage 5: group by on sq2:src_cbo (subquery in having) +-- Stage 6: Stage 2 o/p semijoin Stage 5 +explain +select key, value, count(*) +from src_cbo b +where b.key in (select key from src_cbo where src_cbo.key > '8') +group by key, value +having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: src_cbo + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((key > '8') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1202 Data size: 213956 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1202 Data size: 213956 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1202 Data size: 223572 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1202 Data size: 223572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col2 is not null (type: boolean) + Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: bigint) + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 83 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- non agg, non corr, windowing +explain +select p_mfgr, p_name, avg(p_size) +from part +group by p_mfgr, p_name +having p_name in + (select first_value(p_name) over(partition by p_mfgr order by p_size) from part) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr, windowing +explain +select p_mfgr, p_name, avg(p_size) +from part +group by p_mfgr, p_name +having p_name in + (select first_value(p_name) over(partition by p_mfgr order by p_size) from part) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _wcol0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: _wcol0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: double) + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- 18. SubQueries Not Exists +-- distinct, corr +select * +from src_cbo b +where not exists + (select distinct a.key + from src_cbo a + where b.value = a.value and a.value > 'val_2' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +#### A masked pattern was here #### +POSTHOOK: query: -- 18. SubQueries Not Exists +-- distinct, corr +select * +from src_cbo b +where not exists + (select distinct a.key + from src_cbo a + where b.value = a.value and a.value > 'val_2' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_cbo +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +PREHOOK: query: -- no agg, corr, having +select * +from src_cbo b +group by key, value +having not exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_12' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +#### A masked pattern was here #### +POSTHOOK: query: -- no agg, corr, having +select * +from src_cbo b +group by key, value +having not exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_12' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_cbo +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +119 val_119 +12 val_12 +PREHOOK: query: -- 19. SubQueries Exists +-- view test +create view cv1 as +select * +from src_cbo b +where exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src_cbo +POSTHOOK: query: -- 19. SubQueries Exists +-- view test +create view cv1 as +select * +from src_cbo b +where exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src_cbo +POSTHOOK: Output: default@cv1 +PREHOOK: query: select * from cv1 +PREHOOK: type: QUERY +PREHOOK: Input: default@cv1 +PREHOOK: Input: default@src_cbo +#### A masked pattern was here #### +POSTHOOK: query: select * from cv1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cv1 +POSTHOOK: Input: default@src_cbo +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- sq in from +select * +from (select * + from src_cbo b + where exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_9') + ) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +#### A masked pattern was here #### +POSTHOOK: query: -- sq in from +select * +from (select * + from src_cbo b + where exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_9') + ) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_cbo +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- sq in from, having +select * +from (select b.key, count(*) + from src_cbo b + group by b.key + having exists + (select a.key + from src_cbo a + where a.key = b.key and a.value > 'val_9' + ) +) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +#### A masked pattern was here #### +POSTHOOK: query: -- sq in from, having +select * +from (select b.key, count(*) + from src_cbo b + group by b.key + having exists + (select a.key + from src_cbo a + where a.key = b.key and a.value > 'val_9' + ) +) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_cbo +#### A masked pattern was here #### +90 3 +92 1 +95 2 +96 1 +97 2 +98 2