diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index d7c84cda7b..a9a9a386f6 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -735,6 +735,8 @@ minillaplocal.query.files=\ strict_managed_tables_sysdb.q,\ strict_managed_tables1.q,\ strict_managed_tables2.q,\ + subquery_ANY.q,\ + subquery_ALL.q,\ subquery_in_having.q,\ subquery_notin.q,\ subquery_nested_subquery.q, \ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index ecd70b2b0f..34b58d8246 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -35,7 +35,7 @@ import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlFunctionCategory; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.fun.*; import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.InferTypes; @@ -162,280 +162,325 @@ private SqlTypeName getAggTypeForScalarSub(RexSubQuery e) { return relAgg.getAggCallList().get(0).getType().getSqlTypeName(); } - protected RexNode apply(RelMetadataQuery mq, RexSubQuery e, Set variablesSet, - RelOptUtil.Logic logic, - HiveSubQRemoveRelBuilder builder, int inputCount, int offset, - boolean isCorrScalarAgg) { - switch (e.getKind()) { - case SCALAR_QUERY: - // if scalar query has aggregate and no windowing and no gby avoid adding sq_count_check - // since it is guaranteed to produce at most one row - Double maxRowCount = mq.getMaxRowCount(e.rel); - boolean shouldIntroSQCountCheck = maxRowCount== null || maxRowCount > 1.0; - if(shouldIntroSQCountCheck) { - builder.push(e.rel); - // returns single row/column - builder.aggregate(builder.groupKey(), builder.count(false, "cnt")); + private RexNode rewriteScalar(RelMetadataQuery mq, RexSubQuery e, Set variablesSet, + HiveSubQRemoveRelBuilder builder, int offset, int inputCount, + boolean isCorrScalarAgg) { + // if scalar query has aggregate and no windowing and no gby avoid adding sq_count_check + // since it is guaranteed to produce at most one row + Double maxRowCount = mq.getMaxRowCount(e.rel); + boolean shouldIntroSQCountCheck = maxRowCount== null || maxRowCount > 1.0; + if(shouldIntroSQCountCheck) { + builder.push(e.rel); + // returns single row/column + builder.aggregate(builder.groupKey(), builder.count(false, "cnt")); - SqlFunction countCheck = - new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, - InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, - SqlFunctionCategory.USER_DEFINED_FUNCTION); + SqlFunction countCheck = + new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, + InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, + SqlFunctionCategory.USER_DEFINED_FUNCTION); - //we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer - // ends up getting rid of Project since it is not used further up the tree - builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, - builder.call(countCheck, builder.field("cnt")), builder.literal(1))); - if (!variablesSet.isEmpty()) { - builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); - } else { - builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); - } - offset++; - } - if(isCorrScalarAgg) { - // Transformation : - // Outer Query Left Join (inner query) on correlated predicate - // and preserve rows only from left side. - builder.push(e.rel); - final List parentQueryFields = new ArrayList<>(); - parentQueryFields.addAll(builder.fields()); - - // id is appended since there could be multiple scalar subqueries and FILTER - // is created using field name - String indicator = "alwaysTrue" + e.rel.getId(); - parentQueryFields.add(builder.alias(builder.literal(true), indicator)); - builder.project(parentQueryFields); + //we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer + // ends up getting rid of Project since it is not used further up the tree + builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, + builder.call(countCheck, builder.field("cnt")), builder.literal(1))); + if (!variablesSet.isEmpty()) { builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); - - final ImmutableList.Builder operands = ImmutableList.builder(); - RexNode literal; - if(isAggZeroOnEmpty(e)) { - // since count has a return type of BIG INT we need to make a literal of type big int - // relbuilder's literal doesn't allow this - literal = e.rel.getCluster().getRexBuilder().makeBigintLiteral(new BigDecimal(0)); - } else { - literal = e.rel.getCluster().getRexBuilder().makeNullLiteral(getAggTypeForScalarSub(e)); - } - operands.add((builder.isNull(builder.field(indicator))), literal); - operands.add(field(builder, 1, builder.fields().size()-2)); - return builder.call(SqlStdOperatorTable.CASE, operands.build()); + } else { + builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); } - - //Transformation is to left join for correlated predicates and inner join otherwise, - // but do a count on inner side before that to make sure it generates atmost 1 row. + offset++; + } + if(isCorrScalarAgg) { + // Transformation : + // Outer Query Left Join (inner query) on correlated predicate + // and preserve rows only from left side. builder.push(e.rel); + final List parentQueryFields = new ArrayList<>(); + parentQueryFields.addAll(builder.fields()); + + // id is appended since there could be multiple scalar subqueries and FILTER + // is created using field name + String indicator = "alwaysTrue" + e.rel.getId(); + parentQueryFields.add(builder.alias(builder.literal(true), indicator)); + builder.project(parentQueryFields); builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); - return field(builder, inputCount, offset); - case IN: - case EXISTS: - // Most general case, where the left and right keys might have nulls, and - // caller requires 3-valued logic return. - // - // select e.deptno, e.deptno in (select deptno from emp) - // - // becomes - // - // select e.deptno, - // case - // when ct.c = 0 then false - // when dt.i is not null then true - // when e.deptno is null then null - // when ct.ck < ct.c then null - // else false - // end - // from e - // left join ( - // (select count(*) as c, count(deptno) as ck from emp) as ct - // cross join (select distinct deptno, true as i from emp)) as dt - // on e.deptno = dt.deptno - // - // If keys are not null we can remove "ct" and simplify to - // - // select e.deptno, - // case - // when dt.i is not null then true - // else false - // end - // from e - // left join (select distinct deptno, true as i from emp) as dt - // on e.deptno = dt.deptno - // - // We could further simplify to - // - // select e.deptno, - // dt.i is not null - // from e - // left join (select distinct deptno, true as i from emp) as dt - // on e.deptno = dt.deptno - // - // but have not yet. - // - // If the logic is TRUE we can just kill the record if the condition - // evaluates to FALSE or UNKNOWN. Thus the query simplifies to an inner - // join: - // - // select e.deptno, - // true - // from e - // inner join (select distinct deptno from emp) as dt - // on e.deptno = dt.deptno - // + final ImmutableList.Builder operands = ImmutableList.builder(); + RexNode literal; + if(isAggZeroOnEmpty(e)) { + // since count has a return type of BIG INT we need to make a literal of type big int + // relbuilder's literal doesn't allow this + literal = e.rel.getCluster().getRexBuilder().makeBigintLiteral(new BigDecimal(0)); + } else { + literal = e.rel.getCluster().getRexBuilder().makeNullLiteral(getAggTypeForScalarSub(e)); + } + operands.add((builder.isNull(builder.field(indicator))), literal); + operands.add(field(builder, 1, builder.fields().size()-2)); + return builder.call(SqlStdOperatorTable.CASE, operands.build()); + } - builder.push(e.rel); - final List fields = new ArrayList<>(); - if(e.getKind() == SqlKind.IN) { - fields.addAll(builder.fields()); - // Transformation: sq_count_check(count(*), true) FILTER is generated on top - // of subquery which is then joined (LEFT or INNER) with outer query - // This transformation is done to add run time check using sq_count_check to - // throw an error if subquery is producing zero row, since with aggregate this - // will produce wrong results (because we further rewrite such queries into JOIN) - if(isCorrScalarAgg) { - // returns single row/column - builder.aggregate(builder.groupKey(), - builder.count(false, "cnt_in")); - - if (!variablesSet.isEmpty()) { - builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); - } else { - builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); - } + //Transformation is to left join for correlated predicates and inner join otherwise, + // but do a count on inner side before that to make sure it generates atmost 1 row. + builder.push(e.rel); + builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); + return field(builder, inputCount, offset); + } - SqlFunction inCountCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, - ReturnTypes.BIGINT, InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, - SqlFunctionCategory.USER_DEFINED_FUNCTION); + private RexNode rewriteSomeAll(RexSubQuery e, Set variablesSet, + HiveSubQRemoveRelBuilder builder) { + final SqlQuantifyOperator op = (SqlQuantifyOperator) e.op; + assert(op == SqlStdOperatorTable.SOME_GE + || op == SqlStdOperatorTable.SOME_LE + || op == SqlStdOperatorTable.SOME_LT + || op == SqlStdOperatorTable.SOME_GT); + builder.push(e.rel) + .aggregate(builder.groupKey(), + op.comparisonKind == SqlKind.GREATER_THAN + || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL + ? builder.min("m", builder.field(0)) + : builder.max("m", builder.field(0)), + builder.count(false, "c"), + builder.count(false, "d", builder.field(0))) + .as("q") + .join(JoinRelType.INNER); + return builder.call(SqlStdOperatorTable.CASE, + builder.call(SqlStdOperatorTable.EQUALS, + builder.field("q", "c"), builder.literal(0)), + builder.literal(false), + builder.call(SqlStdOperatorTable.IS_TRUE, + builder.call(RelOptUtil.op(op.comparisonKind, null), + e.operands.get(0), builder.field("q", "m"))), + builder.literal(true), + builder.call(SqlStdOperatorTable.GREATER_THAN, + builder.field("q", "c"), builder.field("q", "d")), + builder.literal(null), + builder.call(RelOptUtil.op(op.comparisonKind, null), + e.operands.get(0), builder.field("q", "m"))); - // we create FILTER (sq_count_check(count()) > 0) instead of PROJECT - // because RelFieldTrimmer ends up getting rid of Project - // since it is not used further up the tree - builder.filter(builder.call(SqlStdOperatorTable.GREATER_THAN, - //true here indicates that sq_count_check is for IN/NOT IN subqueries - builder.call(inCountCheck, builder.field("cnt_in"), builder.literal(true)), - builder.literal(0))); - offset = offset + 1; - builder.push(e.rel); - } - } + } - // First, the cross join - switch (logic) { - case TRUE_FALSE_UNKNOWN: - case UNKNOWN_AS_TRUE: - // Since EXISTS/NOT EXISTS are not affected by presence of - // null keys we do not need to generate count(*), count(c) - if (e.getKind() == SqlKind.EXISTS) { - logic = RelOptUtil.Logic.TRUE_FALSE; - break; - } + private RexNode rewriteInExists(RexSubQuery e, Set variablesSet, + RelOptUtil.Logic logic, HiveSubQRemoveRelBuilder builder, int offset, + boolean isCorrScalarAgg) { + // Most general case, where the left and right keys might have nulls, and + // caller requires 3-valued logic return. + // + // select e.deptno, e.deptno in (select deptno from emp) + // + // becomes + // + // select e.deptno, + // case + // when ct.c = 0 then false + // when dt.i is not null then true + // when e.deptno is null then null + // when ct.ck < ct.c then null + // else false + // end + // from e + // left join ( + // (select count(*) as c, count(deptno) as ck from emp) as ct + // cross join (select distinct deptno, true as i from emp)) as dt + // on e.deptno = dt.deptno + // + // If keys are not null we can remove "ct" and simplify to + // + // select e.deptno, + // case + // when dt.i is not null then true + // else false + // end + // from e + // left join (select distinct deptno, true as i from emp) as dt + // on e.deptno = dt.deptno + // + // We could further simplify to + // + // select e.deptno, + // dt.i is not null + // from e + // left join (select distinct deptno, true as i from emp) as dt + // on e.deptno = dt.deptno + // + // but have not yet. + // + // If the logic is TRUE we can just kill the record if the condition + // evaluates to FALSE or UNKNOWN. Thus the query simplifies to an inner + // join: + // + // select e.deptno, + // true + // from e + // inner join (select distinct deptno from emp) as dt + // on e.deptno = dt.deptno + // + + builder.push(e.rel); + final List fields = new ArrayList<>(); + if(e.getKind() == SqlKind.IN) { + fields.addAll(builder.fields()); + // Transformation: sq_count_check(count(*), true) FILTER is generated on top + // of subquery which is then joined (LEFT or INNER) with outer query + // This transformation is done to add run time check using sq_count_check to + // throw an error if subquery is producing zero row, since with aggregate this + // will produce wrong results (because we further rewrite such queries into JOIN) + if(isCorrScalarAgg) { + // returns single row/column builder.aggregate(builder.groupKey(), - builder.count(false, "c"), - builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck", - builder.fields())); - builder.as("ct"); - if(!variablesSet.isEmpty()) { - //builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); + builder.count(false, "cnt_in")); + + if (!variablesSet.isEmpty()) { builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); } else { builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); } - offset += 2; + SqlFunction inCountCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, + ReturnTypes.BIGINT, InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, + SqlFunctionCategory.USER_DEFINED_FUNCTION); + + // we create FILTER (sq_count_check(count()) > 0) instead of PROJECT + // because RelFieldTrimmer ends up getting rid of Project + // since it is not used further up the tree + builder.filter(builder.call(SqlStdOperatorTable.GREATER_THAN, + //true here indicates that sq_count_check is for IN/NOT IN subqueries + builder.call(inCountCheck, builder.field("cnt_in"), builder.literal(true)), + builder.literal(0))); + offset = offset + 1; builder.push(e.rel); - break; } + } - // Now the left join - switch (logic) { - case TRUE: - if (fields.isEmpty()) { - builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId())); - if(!variablesSet.isEmpty() - && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) { - // avoid adding group by for correlated IN/EXISTS queries - // since this is rewritting into semijoin - break; - } else { - builder.aggregate(builder.groupKey(0)); - } - } else { - if(!variablesSet.isEmpty() - && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) { - // avoid adding group by for correlated IN/EXISTS queries - // since this is rewritting into semijoin - break; - } else { - builder.aggregate(builder.groupKey(fields)); - } - } + // First, the cross join + switch (logic) { + case TRUE_FALSE_UNKNOWN: + case UNKNOWN_AS_TRUE: + // Since EXISTS/NOT EXISTS are not affected by presence of + // null keys we do not need to generate count(*), count(c) + if (e.getKind() == SqlKind.EXISTS) { + logic = RelOptUtil.Logic.TRUE_FALSE; break; - default: - fields.add(builder.alias(builder.literal(true), "i" + e.rel.getId())); - builder.project(fields); - builder.distinct(); - } - builder.as("dt"); - final List conditions = new ArrayList<>(); - for (Pair pair - : Pair.zip(e.getOperands(), builder.fields())) { - conditions.add( - builder.equals(pair.left, RexUtil.shift(pair.right, offset))); } - switch (logic) { - case TRUE: - builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet, true); - return builder.literal(true); + builder.aggregate(builder.groupKey(), + builder.count(false, "c"), + builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck", + builder.fields())); + builder.as("ct"); + if(!variablesSet.isEmpty()) { + //builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); + builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); + } else { + builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); } - builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet); - final List keyIsNulls = new ArrayList<>(); - for (RexNode operand : e.getOperands()) { - if (operand.getType().isNullable()) { - keyIsNulls.add(builder.isNull(operand)); + offset += 2; + builder.push(e.rel); + break; + } + + // Now the left join + switch (logic) { + case TRUE: + if (fields.isEmpty()) { + builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId())); + if(!variablesSet.isEmpty() + && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) { + // avoid adding group by for correlated IN/EXISTS queries + // since this is rewritting into semijoin + break; + } else { + builder.aggregate(builder.groupKey(0)); + } + } else { + if(!variablesSet.isEmpty() + && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) { + // avoid adding group by for correlated IN/EXISTS queries + // since this is rewritting into semijoin + break; + } else { + builder.aggregate(builder.groupKey(fields)); } } - final ImmutableList.Builder operands = ImmutableList.builder(); - switch (logic) { - case TRUE_FALSE_UNKNOWN: - case UNKNOWN_AS_TRUE: - operands.add( - builder.equals(builder.field("ct", "c"), builder.literal(0)), - builder.literal(false)); - //now that we are using LEFT OUTER JOIN to join inner count, count(*) - // with outer table, we wouldn't be able to tell if count is zero - // for inner table since inner join with correlated values will get rid - // of all values where join cond is not true (i.e where actual inner table - // will produce zero result). To handle this case we need to check both - // count is zero or count is null - operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false)); - break; - } - operands.add(builder.isNotNull(builder.field("dt", "i" + e.rel.getId())), - builder.literal(true)); - if (!keyIsNulls.isEmpty()) { - //Calcite creates null literal with Null type here but - // because HIVE doesn't support null type it is appropriately typed boolean - operands.add(builder.or(keyIsNulls), - e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN)); - // we are creating filter here so should not be returning NULL. - // Not sure why Calcite return NULL - } - RexNode b = builder.literal(true); - switch (logic) { - case TRUE_FALSE_UNKNOWN: - b = e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN); - // fall through - case UNKNOWN_AS_TRUE: - operands.add( - builder.call(SqlStdOperatorTable.LESS_THAN, - builder.field("ct", "ck"), builder.field("ct", "c")), - b); - break; + break; + default: + fields.add(builder.alias(builder.literal(true), "i" + e.rel.getId())); + builder.project(fields); + builder.distinct(); + } + builder.as("dt"); + final List conditions = new ArrayList<>(); + for (Pair pair + : Pair.zip(e.getOperands(), builder.fields())) { + conditions.add( + builder.equals(pair.left, RexUtil.shift(pair.right, offset))); + } + switch (logic) { + case TRUE: + builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet, true); + return builder.literal(true); + } + builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet); + + final List keyIsNulls = new ArrayList<>(); + for (RexNode operand : e.getOperands()) { + if (operand.getType().isNullable()) { + keyIsNulls.add(builder.isNull(operand)); } - operands.add(builder.literal(false)); - return builder.call(SqlStdOperatorTable.CASE, operands.build()); + } + final ImmutableList.Builder operands = ImmutableList.builder(); + switch (logic) { + case TRUE_FALSE_UNKNOWN: + case UNKNOWN_AS_TRUE: + operands.add( + builder.equals(builder.field("ct", "c"), builder.literal(0)), + builder.literal(false)); + //now that we are using LEFT OUTER JOIN to join inner count, count(*) + // with outer table, we wouldn't be able to tell if count is zero + // for inner table since inner join with correlated values will get rid + // of all values where join cond is not true (i.e where actual inner table + // will produce zero result). To handle this case we need to check both + // count is zero or count is null + operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false)); + break; + } + operands.add(builder.isNotNull(builder.field("dt", "i" + e.rel.getId())), + builder.literal(true)); + if (!keyIsNulls.isEmpty()) { + //Calcite creates null literal with Null type here but + // because HIVE doesn't support null type it is appropriately typed boolean + operands.add(builder.or(keyIsNulls), + e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN)); + // we are creating filter here so should not be returning NULL. + // Not sure why Calcite return NULL + } + RexNode b = builder.literal(true); + switch (logic) { + case TRUE_FALSE_UNKNOWN: + b = e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN); + // fall through + case UNKNOWN_AS_TRUE: + operands.add( + builder.call(SqlStdOperatorTable.LESS_THAN, + builder.field("ct", "ck"), builder.field("ct", "c")), + b); + break; + } + operands.add(builder.literal(false)); + return builder.call(SqlStdOperatorTable.CASE, operands.build()); + } + protected RexNode apply(RelMetadataQuery mq, RexSubQuery e, Set variablesSet, + RelOptUtil.Logic logic, + HiveSubQRemoveRelBuilder builder, int inputCount, int offset, + boolean isCorrScalarAgg) { + switch (e.getKind()) { + case SCALAR_QUERY: + return rewriteScalar(mq,e,variablesSet,builder,offset,inputCount,isCorrScalarAgg); + case SOME: + return rewriteSomeAll(e,variablesSet,builder); + case IN: + case EXISTS: + return rewriteInExists(e,variablesSet,logic,builder,offset,isCorrScalarAgg); default: throw new AssertionError(e.getKind()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 1134cf3bd1..24edbf6187 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -40,8 +40,7 @@ import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; -import org.apache.calcite.sql.fun.SqlCastFunction; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.fun.*; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; @@ -67,9 +66,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter.HiveNlsString.Interpretation; -import org.apache.hadoop.hive.ql.parse.ParseUtils; -import org.apache.hadoop.hive.ql.parse.RowResolver; -import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.*; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -191,6 +188,74 @@ public RexNode convert(ExprNodeDesc expr) throws SemanticException { // TODO: handle ExprNodeColumnListDesc } + private RexNode getSomeSubquery(final RelNode subqueryRel, final RexNode lhs, + final SqlQuantifyOperator quantifyOperator) { + if(quantifyOperator == SqlStdOperatorTable.SOME_EQ) { + return RexSubQuery.in(subqueryRel, ImmutableList.of(lhs) ); + } else if (quantifyOperator == SqlStdOperatorTable.SOME_NE) { + RexSubQuery subQuery = RexSubQuery.in(subqueryRel, ImmutableList.of(lhs)); + return cluster.getRexBuilder().makeCall(SqlStdOperatorTable.NOT, subQuery); + } else { + return RexSubQuery.some(subqueryRel, ImmutableList.of(lhs), quantifyOperator); + } + } + + private void throwInvalidSubqueryError(final ASTNode comparisonOp) throws SemanticException { + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "Invalid operator:" + comparisonOp.toString())); + } + + // <>ANY and =ALL is not supported + private RexNode convertSubquerySomeAll(final ExprNodeSubQueryDesc subQueryDesc) + throws SemanticException { + assert(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.SOME + || subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.ALL); + + RexNode rexNodeLhs = convert(subQueryDesc.getSubQueryLhs()); + ASTNode comparisonOp = subQueryDesc.getComparisonOp(); + SqlQuantifyOperator quantifyOperator = null; + + switch (comparisonOp.getType()) { + case HiveParser.EQUAL: + if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.ALL) { + throwInvalidSubqueryError(comparisonOp); + } + quantifyOperator = SqlStdOperatorTable.SOME_EQ; + break; + case HiveParser.LESSTHAN: + quantifyOperator = SqlStdOperatorTable.SOME_LT; + break; + case HiveParser.LESSTHANOREQUALTO: + quantifyOperator = SqlStdOperatorTable.SOME_LE; + break; + case HiveParser.GREATERTHAN: + quantifyOperator = SqlStdOperatorTable.SOME_GT; + break; + case HiveParser.GREATERTHANOREQUALTO: + quantifyOperator = SqlStdOperatorTable.SOME_GE; + break; + case HiveParser.NOTEQUAL: + if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.SOME) { + throwInvalidSubqueryError(comparisonOp); + } + quantifyOperator = SqlStdOperatorTable.SOME_NE; + break; + default: + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "Invalid operator:" + comparisonOp.toString())); + } + + if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.ALL) { + quantifyOperator = SqlStdOperatorTable.some(quantifyOperator.comparisonKind.negateNullSafe()); + } + RexNode someQuery = getSomeSubquery(subQueryDesc.getRexSubQuery(), rexNodeLhs, + quantifyOperator); + if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.ALL) { + return cluster.getRexBuilder().makeCall(SqlStdOperatorTable.NOT, someQuery); + } + return someQuery; + } + private RexNode convert(final ExprNodeSubQueryDesc subQueryDesc) throws SemanticException { if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.IN ) { /* @@ -221,8 +286,10 @@ else if( subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.SCALAR){ //create RexSubQuery node RexNode rexSubQuery = RexSubQuery.scalar(subQueryDesc.getRexSubQuery()); return rexSubQuery; + } else if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.SOME + || subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.ALL) { + return convertSubquerySomeAll(subQueryDesc); } - else { throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( "Invalid subquery: " + subQueryDesc.getType())); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 20501cf75e..11f02d0ea6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -3178,48 +3178,6 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, return filterRel; } - private void subqueryRestrictionCheck(QB qb, ASTNode searchCond, RelNode srcRel, - boolean forHavingClause, Set corrScalarQueries) - throws SemanticException { - List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); - - ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); - List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); - for(int i=0; i aliasToRel) throws SemanticException { @@ -3378,8 +3336,6 @@ private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean Set corrScalarQueriesWithAgg = new HashSet(); boolean isSubQuery = false; try { - //disallow subqueries which HIVE doesn't currently support - subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, corrScalarQueriesWithAgg); Deque stack = new ArrayDeque(); stack.push(node); @@ -3388,13 +3344,11 @@ private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean switch (next.getType()) { case HiveParser.TOK_SUBQUERY_EXPR: - /* - * Restriction 2.h Subquery isnot allowed in LHS - */ - if (next.getChildren().size() == 3 && next.getChild(2).getType() == HiveParser.TOK_SUBQUERY_EXPR) { - throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION - .getMsg(next.getChild(2), "SubQuery in LHS expressions are not supported.")); - } + + //disallow subqueries which HIVE doesn't currently support + SubQueryUtils.subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, + corrScalarQueriesWithAgg, ctx, this.relToHiveRR); + String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates(); QB qbSQ = new QB(qb.getId(), sbQueryAlias, true); qbSQ.setInsideView(qb.isInsideView()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 608befcdf9..3ca45fe1fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -45,6 +45,7 @@ KW_TRUE : 'TRUE'; KW_FALSE : 'FALSE'; KW_UNKNOWN : 'UNKNOWN'; KW_ALL : 'ALL'; +KW_SOME : 'SOME'; KW_NONE: 'NONE'; KW_AND : 'AND'; KW_OR : 'OR'; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index f22511ad67..58fe0cd32e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -589,6 +589,23 @@ precedenceSimilarExpressionAtom[CommonTree t] | KW_LIKE KW_ALL (expr=expressionsInParenthesis[false, false]) -> ^(TOK_FUNCTION Identifier["likeall"] {$t} {$expr.tree}) + | + precedenceSimilarExpressionQuantifierPredicate[$t] + ; + +precedenceSimilarExpressionQuantifierPredicate[CommonTree t] + : + dropPartitionOperator quantifierType subQueryExpression + -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP quantifierType dropPartitionOperator ) subQueryExpression {$t}) + ; + +quantifierType + : + KW_ANY -> KW_SOME + | + KW_SOME -> KW_SOME + | + KW_ALL -> KW_ALL ; precedenceSimilarExpressionIn[CommonTree t] diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java index bf1af29e0c..51448a4c79 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -45,6 +45,8 @@ NOT_EXISTS, IN, NOT_IN, + SOME, + ALL, SCALAR; public static SubQueryType get(ASTNode opNode) throws SemanticException { @@ -71,6 +73,10 @@ public static SubQueryType get(ASTNode opNode) throws SemanticException { return IN; case HiveParser.TOK_SUBQUERY_OP_NOTIN: return NOT_IN; + case HiveParser.KW_SOME: + return SOME; + case HiveParser.KW_ALL: + return ALL; default: throw new SemanticException(SemanticAnalyzer.generateErrorMessage(opNode, "Operator not supported in SubQuery use.")); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java index 099157f2c7..4fbf2c698f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java @@ -18,19 +18,17 @@ package org.apache.hadoop.hive.ql.parse; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Deque; -import java.util.List; -import java.util.Map; +import java.util.*; import org.antlr.runtime.CommonToken; import org.antlr.runtime.tree.CommonTreeAdaptor; +import org.apache.calcite.rel.*; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.metadata.*; import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType; import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryTypeDef; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount; @@ -101,6 +99,67 @@ static ASTNode isNull(ASTNode expr) { return node; } + static public void subqueryRestrictionCheck(QB qb, ASTNode subqueryExprNode, RelNode srcRel, + boolean forHavingClause, Set corrScalarQueries, Context ctx, + LinkedHashMap relToHiveRR) + throws SemanticException { + + assert(subqueryExprNode.getType() == HiveParser.TOK_SUBQUERY_EXPR); + + /* + * Restriction : Subquery is not allowed in LHS + */ + if (subqueryExprNode.getChildren().size() == 3 + && subqueryExprNode.getChild(2).getType() == HiveParser.TOK_SUBQUERY_EXPR) { + throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION + .getMsg(subqueryExprNode.getChild(2), "SubQuery on left hand side is not supported.")); + } + + // avoid subquery restrictions for SOME/ALL for now + if(subqueryExprNode.getChild(0).getChildCount() > 1 + && (subqueryExprNode.getChild(0).getChild(1).getType() == HiveParser.KW_SOME + || subqueryExprNode.getChild(0).getChild(1).getType() == HiveParser.KW_ALL)) { + return; + } + + // TOK_SUBQUERY_EXPR + // 0. TOK_SUBQUERY_OP + // 0. TYPE: IN/SOME/EXISTS + // 1. Comparion op: >, < etc + // 1. TOK_QUERY: Subquery + // 2. LHS expr + ASTNode clonedSubExprAST = (ASTNode) SubQueryUtils.adaptor.dupTree(subqueryExprNode); + //we do not care about the transformation or rewriting of AST + // which following statement does + // we only care about the restriction checks they perform. + // We plan to get rid of these restrictions later + int sqIdx = qb.incrNumSubQueryPredicates(); + ASTNode outerQueryExpr = (ASTNode) clonedSubExprAST.getChild(2); + + if (outerQueryExpr != null && outerQueryExpr.getType() == HiveParser.TOK_SUBQUERY_EXPR) { + throw new CalciteSubquerySemanticException( + ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + outerQueryExpr, "IN/EXISTS/SOME/ALL subqueries are not allowed in LHS")); + } + + QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, clonedSubExprAST, + subqueryExprNode, ctx); + + RowResolver inputRR = relToHiveRR.get(srcRel); + + String havingInputAlias = null; + + boolean [] subqueryConfig = {false, false}; + subQuery.subqueryRestrictionsCheck(inputRR, forHavingClause, + havingInputAlias, subqueryConfig); + + if(subqueryConfig[0]) { + corrScalarQueries.add(subqueryExprNode); + } + //} + } + + /* * Check that SubQuery is a top level conjuncts. @@ -236,15 +295,6 @@ static QBSubQuery buildSubQuery(String outerQueryId, ASTNode sq = (ASTNode) sqAST.getChild(1); ASTNode outerQueryExpr = (ASTNode) sqAST.getChild(2); - /* - * Restriction.8.m :: We allow only 1 SubQuery expression per Query. - */ - if (outerQueryExpr != null && outerQueryExpr.getType() == HiveParser.TOK_SUBQUERY_EXPR ) { - - throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - originalSQAST.getChild(1), "Only 1 SubQuery expression is supported.")); - } - return new QBSubQuery(outerQueryId, sqIdx, sq, outerQueryExpr, buildSQOperator(sqOp), originalSQAST, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index dff108aa6e..7e804e3c2d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -1754,6 +1754,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, || subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTIN); boolean isEXISTS = (subqueryOp.getChildCount() > 0) && (subqueryOp.getChild(0).getType() == HiveParser.KW_EXISTS || subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTEXISTS); + boolean isSOME = (subqueryOp.getChildCount() > 0) && (subqueryOp.getChild(0).getType() == HiveParser.KW_SOME); + boolean isALL = (subqueryOp.getChildCount() > 0) && (subqueryOp.getChild(0).getType() == HiveParser.KW_ALL); boolean isScalar = subqueryOp.getChildCount() == 0 ; // subqueryToRelNode might be null if subquery expression anywhere other than @@ -1791,6 +1793,16 @@ else if(isScalar){ TypeInfo subExprType = TypeConverter.convert(subqueryRel.getRowType().getFieldList().get(0).getType()); return new ExprNodeSubQueryDesc(subExprType, subqueryRel, ExprNodeSubQueryDesc.SubqueryType.SCALAR); + } else if(isSOME) { + assert(nodeOutputs[2] != null); + ExprNodeDesc lhs = (ExprNodeDesc)nodeOutputs[2]; + return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryRel, + ExprNodeSubQueryDesc.SubqueryType.SOME, lhs, (ASTNode)subqueryOp.getChild(1) ); + } else if(isALL) { + assert(nodeOutputs[2] != null); + ExprNodeDesc lhs = (ExprNodeDesc)nodeOutputs[2]; + return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryRel, + ExprNodeSubQueryDesc.SubqueryType.ALL, lhs, (ASTNode)subqueryOp.getChild(1)); } /* diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java index dbb1cd7524..3286968b05 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java @@ -20,6 +20,8 @@ import java.io.Serializable; +import org.apache.hadoop.hive.ql.metadata.*; +import org.apache.hadoop.hive.ql.parse.*; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.calcite.rel.RelNode; @@ -35,7 +37,9 @@ public static enum SubqueryType{ IN, EXISTS, - SCALAR + SCALAR, + SOME, + ALL }; /** @@ -44,12 +48,14 @@ private RelNode rexSubQuery; private ExprNodeDesc subQueryLhs; private SubqueryType type; + private ASTNode comparisonOp; public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, SubqueryType type) { super(typeInfo); this.rexSubQuery = subQuery; this.subQueryLhs = null; this.type = type; + this.comparisonOp = null; } public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, SubqueryType type, ExprNodeDesc lhs) { @@ -57,9 +63,19 @@ public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, this.rexSubQuery = subQuery; this.subQueryLhs = lhs; this.type = type; - + this.comparisonOp = null; + } + public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, + SubqueryType type, ExprNodeDesc lhs, ASTNode comparisonOp) { + super(typeInfo); + this.rexSubQuery = subQuery; + this.subQueryLhs = lhs; + this.type = type; + this.comparisonOp = comparisonOp; } + public ASTNode getComparisonOp() {return this.comparisonOp; } + public SubqueryType getType() { return type; } diff --git a/ql/src/test/queries/clientnegative/subquery_all_equal.q b/ql/src/test/queries/clientnegative/subquery_all_equal.q new file mode 100644 index 0000000000..7e3a360287 --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_all_equal.q @@ -0,0 +1,3 @@ +--! qt:dataset:part +-- =ALL is not allowed +explain select * from part where p_type = ALL(select max(p_type) from part); \ No newline at end of file diff --git a/ql/src/test/queries/clientnegative/subquery_any_notequal.q b/ql/src/test/queries/clientnegative/subquery_any_notequal.q new file mode 100644 index 0000000000..26d96ace49 --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_any_notequal.q @@ -0,0 +1,3 @@ +--! qt:dataset:part +-- <>ANY is not allowed +explain select * from part where p_type <> ANY(select max(p_type) from part); \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/subquery_ALL.q b/ql/src/test/queries/clientpositive/subquery_ALL.q new file mode 100644 index 0000000000..160727932f --- /dev/null +++ b/ql/src/test/queries/clientpositive/subquery_ALL.q @@ -0,0 +1,63 @@ +--! qt:dataset:part + +--empty table +create table tempty(i int, j int); + +CREATE TABLE part_null_n0 as select * from part; +insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL); + +-- test all six comparison operators +--explain cbo select count(*) from part where p_partkey = ALL (select p_partkey from part); +--select count(*) from part where p_partkey = ALL (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey <> ALL (select p_partkey from part); +select count(*) from part where p_partkey <> ALL (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey > ALL (select p_partkey from part); +select count(*) from part where p_partkey > ALL (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey < ALL (select p_partkey from part); +select count(*) from part where p_partkey < ALL (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey >= ALL (select p_partkey from part); +select count(*) from part where p_partkey >= ALL (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey <= ALL (select p_partkey from part); +select count(*) from part where p_partkey <= ALL (select p_partkey from part); + +-- ALL with aggregate in subquery +explain cbo select count(*) from part where p_size < ALL (select max(p_size) from part group by p_partkey); +select count(*) from part where p_size < ALL (select max(p_size) from part group by p_partkey); + +select count(*) from part where p_size < ALL (select max(null) from part group by p_partkey); + +--empty row produces true with ALL +select count(*) from part where p_partkey <> ALL(select i from tempty); + +-- true + null, should produce zero results +select count(*) from part where p_partkey > ALL (select max(p_partkey) from part_null_n0 UNION select null from part group by true); + +-- false + null -> false, therefore should produce results +select count(*) from part where ((p_partkey <> ALL (select p_partkey from part_null_n0)) == false); + +-- all null -> null +select count(*) from part where (p_partkey <> ALL (select p_partkey from part_null_n0 where p_partkey is null)) is null; + +-- false, should produce zero result +select count(*) from part where p_partkey > ALL (select max(p_partkey) from part_null_n0); + +-- ALL in having +explain cbo select count(*) from part having count(*) > ALL (select count(*) from part group by p_partkey); +select count(*) from part having count(*) > ALL (select count(*) from part group by p_partkey); + +-- multiple +explain cbo select count(*) from part where p_partkey >= ALL (select p_partkey from part) + AND p_size <> ALL (select p_size from part group by p_size); +select count(*) from part where p_partkey >= ALL (select p_partkey from part) + AND p_partkey <> ALL (select p_size from part group by p_size); + +--nested +explain cbo select count(*) from part where p_partkey + >= ALL (select p_partkey from part where p_size >= ALL(select p_size from part_null_n0 group by p_size)) ; +select count(*) from part where p_partkey + >= ALL (select p_partkey from part where p_size >= ALL(select p_size from part_null_n0 group by p_size)) ; diff --git a/ql/src/test/queries/clientpositive/subquery_ANY.q b/ql/src/test/queries/clientpositive/subquery_ANY.q new file mode 100644 index 0000000000..1dfbccd612 --- /dev/null +++ b/ql/src/test/queries/clientpositive/subquery_ANY.q @@ -0,0 +1,67 @@ +--! qt:dataset:part + +--empty table +create table tempty(i int, j int); + +CREATE TABLE part_null_n0 as select * from part; +insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL); + +-- test all six comparison operators +explain cbo select count(*) from part where p_partkey = ANY (select p_partkey from part); +select count(*) from part where p_partkey = ANY (select p_partkey from part); + +--explain cbo select count(*) from part where p_partkey <> ANY (select p_partkey from part); +--select count(*) from part where p_partkey <> ANY (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey > ANY (select p_partkey from part); +select count(*) from part where p_partkey > ANY (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey < ANY (select p_partkey from part); +select count(*) from part where p_partkey < ANY (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part); +select count(*) from part where p_partkey >= ANY (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey <= ANY (select p_partkey from part); +select count(*) from part where p_partkey <= ANY (select p_partkey from part); + +-- SOME is same as ANY +explain cbo select count(*) from part where p_partkey = SOME(select min(p_partkey) from part); +select count(*) from part where p_partkey = SOME(select min(p_partkey) from part); + +-- ANY with aggregate in subquery +explain cbo select count(*) from part where p_size < ANY (select max(p_size) from part group by p_partkey); +select count(*) from part where p_size < ANY (select max(p_size) from part group by p_partkey); + +select count(*) from part where p_size < ANY (select max(null) from part group by p_partkey); + +--empty row produces false with ANY +select count(*) from part where p_partkey = ANY(select i from tempty); + +-- true + null, should produce results +select count(*) from part where p_partkey = ANY (select p_partkey from part_null_n0); + +-- false + null -> null +select count(*) from part where (p_size= ANY (select p_partkey from part_null_n0)) is null; + +-- all null -> null +select count(*) from part where (p_partkey = ANY (select p_partkey from part_null_n0 where p_partkey is null)) is null; + +-- false, should produce zero result +select count(*) from part where p_partkey > ANY (select max(p_partkey) from part_null_n0); + +-- ANY in having +explain cbo select count(*) from part having count(*) > ANY (select count(*) from part group by p_partkey); +select count(*) from part having count(*) > ANY (select count(*) from part group by p_partkey); + +-- multiple +explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part) + AND p_size = ANY (select p_size from part group by p_size); +select count(*) from part where p_partkey >= ANY (select p_partkey from part) + AND p_size = ANY (select p_size from part group by p_size); + +--nested +explain cbo select count(*) from part where p_partkey + >= ANY (select p_partkey from part where p_size >= ANY(select p_size from part_null_n0 group by p_size)) ; +select count(*) from part where p_partkey + >= ANY (select p_partkey from part where p_size >= ANY(select p_size from part_null_n0 group by p_size)) ; diff --git a/ql/src/test/results/clientnegative/subquery_all_equal.q.out b/ql/src/test/results/clientnegative/subquery_all_equal.q.out new file mode 100644 index 0000000000..47a0ad379f --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_all_equal.q.out @@ -0,0 +1 @@ +FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Invalid SubQuery expression Invalid operator:= diff --git a/ql/src/test/results/clientnegative/subquery_any_notequal.q.out b/ql/src/test/results/clientnegative/subquery_any_notequal.q.out new file mode 100644 index 0000000000..500a8c298e --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_any_notequal.q.out @@ -0,0 +1 @@ +FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Invalid SubQuery expression Invalid operator:<> diff --git a/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out b/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out new file mode 100644 index 0000000000..0282ab4422 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out @@ -0,0 +1,416 @@ +PREHOOK: query: create table tempty(i int, j int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(i int, j int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: CREATE TABLE part_null_n0 as select * from part +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@part +PREHOOK: Output: database:default +PREHOOK: Output: default@part_null_n0 +POSTHOOK: query: CREATE TABLE part_null_n0 as select * from part +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@part +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_null_n0 +POSTHOOK: Lineage: part_null_n0.p_brand SIMPLE [(part)part.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_comment SIMPLE [(part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_container SIMPLE [(part)part.FieldSchema(name:p_container, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_partkey SIMPLE [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_retailprice SIMPLE [(part)part.FieldSchema(name:p_retailprice, type:double, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_type SIMPLE [(part)part.FieldSchema(name:p_type, type:string, comment:null), ] +PREHOOK: query: insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@part_null_n0 +POSTHOOK: query: insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@part_null_n0 +POSTHOOK: Lineage: part_null_n0.p_brand EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_comment EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_container EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_mfgr EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_name EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_partkey EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_retailprice EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_size EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_type EXPRESSION [] +Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey <> ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey <> ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[AND(OR(IS NULL($4), =($1, 0)), OR(IS NOT NULL($0), =($1, 0), IS NOT NULL($4)), OR(>=($2, $1), =($1, 0), IS NOT NULL($4), IS NULL($0)))]) + HiveProject(p_partkey=[$0], c=[$3], ck=[$4], p_partkey0=[$1], i112=[$2]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $1)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(p_partkey=[$0], i112=[true]) + HiveAggregate(group=[{0}]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(c=[$0], ck=[$1]) + HiveAggregate(group=[{}], c=[COUNT()], ck=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: select count(*) from part where p_partkey <> ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey <> ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey > ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey > ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(<=($0, $1)), $2), OR($3, $2, <=($0, $1)), OR(>($0, $1), $2, <=($0, $1), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey > ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey > ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey < ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey < ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(>=($0, $1)), $2), OR($3, $2, >=($0, $1)), OR(<($0, $1), $2, >=($0, $1), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey < ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey < ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey >= ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey >= ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(<($0, $1)), $2), OR($3, $2, <($0, $1)), OR(>=($0, $1), $2, <($0, $1), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey >= ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey >= ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +1 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey <= ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey <= ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(>($0, $1)), $2), OR($3, $2, >($0, $1)), OR(<=($0, $1), $2, >($0, $1), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey <= ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey <= ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +1 +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_size < ALL (select max(p_size) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_size < ALL (select max(p_size) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(>=($0, $1)), $2), OR($3, $2, >=($0, $1)), OR(<($0, $1), $2, >=($0, $1), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_size=[$5]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)]) + HiveProject(p_partkey=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[max($5)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_size < ALL (select max(p_size) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_size < ALL (select max(p_size) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_size < ALL (select max(null) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_size < ALL (select max(null) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: select count(*) from part where p_partkey <> ALL(select i from tempty) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@tempty +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey <> ALL(select i from tempty) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@tempty +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey > ALL (select max(p_partkey) from part_null_n0 UNION select null from part group by true) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey > ALL (select max(p_partkey) from part_null_n0 UNION select null from part group by true) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: select count(*) from part where ((p_partkey <> ALL (select p_partkey from part_null_n0)) == false) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where ((p_partkey <> ALL (select p_partkey from part_null_n0)) == false) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select count(*) from part where (p_partkey <> ALL (select p_partkey from part_null_n0 where p_partkey is null)) is null +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where (p_partkey <> ALL (select p_partkey from part_null_n0 where p_partkey is null)) is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey > ALL (select max(p_partkey) from part_null_n0) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey > ALL (select max(p_partkey) from part_null_n0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: explain cbo select count(*) from part having count(*) > ALL (select count(*) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part having count(*) > ALL (select count(*) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveProject($f0=[$4]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(<=($4, $0)), $1), OR($2, $1, <=($4, $0)), OR(>($4, $0), $1, <=($4, $0), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($1)], c=[COUNT()], d=[COUNT($1)]) + HiveProject(p_partkey=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select count(*) from part having count(*) > ALL (select count(*) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part having count(*) > ALL (select count(*) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[53][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey >= ALL (select p_partkey from part) + AND p_size <> ALL (select p_size from part group by p_size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey >= ALL (select p_partkey from part) + AND p_size <> ALL (select p_size from part group by p_size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[AND(OR(IS NULL($8), =($5, 0)), OR(IS NOT NULL($1), =($5, 0), IS NOT NULL($8)), OR(>=($6, $5), =($5, 0), IS NOT NULL($8), IS NULL($1)))]) + HiveProject(p_partkey=[$0], p_size=[$1], m=[$4], c=[$5], d=[$6], c0=[$7], ck=[$8], p_size0=[$2], i9045=[$3]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(<($0, $4)), =($5, 0)), OR(<=($5, $6), =($5, 0), <($0, $4)), OR(>=($0, $4), =($5, 0), <($0, $4), >($5, $6)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0], p_size=[$5]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(p_size=[$0], i9045=[true]) + HiveAggregate(group=[{5}]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], c=[$1], d=[$2], c0=[$3], ck=[$4]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(m=[$0], c=[$1], d=[$2]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(c=[$0], ck=[$1]) + HiveAggregate(group=[{}], c=[COUNT()], ck=[COUNT($0)]) + HiveProject(p_size=[$0]) + HiveAggregate(group=[{5}]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[53][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: select count(*) from part where p_partkey >= ALL (select p_partkey from part) + AND p_partkey <> ALL (select p_size from part group by p_size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey >= ALL (select p_partkey from part) + AND p_partkey <> ALL (select p_size from part group by p_size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +1 +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey + >= ALL (select p_partkey from part where p_size >= ALL(select p_size from part_null_n0 group by p_size)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey + >= ALL (select p_partkey from part where p_size >= ALL(select p_size from part_null_n0 group by p_size)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(<($0, $1)), $2), OR($3, $2, <($0, $1)), OR(>=($0, $1), $2, <($0, $1), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(<($1, $2)), $3), OR($4, $3, <($1, $2)), OR(>=($1, $2), $3, <($1, $2), $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0], p_size=[$5]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveProject(p_size=[$0]) + HiveAggregate(group=[{5}]) + HiveTableScan(table=[[default, part_null_n0]], table:alias=[part_null_n0]) + +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey + >= ALL (select p_partkey from part where p_size >= ALL(select p_size from part_null_n0 group by p_size)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey + >= ALL (select p_partkey from part where p_size >= ALL(select p_size from part_null_n0 group by p_size)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 diff --git a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out new file mode 100644 index 0000000000..a7cc5c3336 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out @@ -0,0 +1,425 @@ +PREHOOK: query: create table tempty(i int, j int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(i int, j int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: CREATE TABLE part_null_n0 as select * from part +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@part +PREHOOK: Output: database:default +PREHOOK: Output: default@part_null_n0 +POSTHOOK: query: CREATE TABLE part_null_n0 as select * from part +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@part +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_null_n0 +POSTHOOK: Lineage: part_null_n0.p_brand SIMPLE [(part)part.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_comment SIMPLE [(part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_container SIMPLE [(part)part.FieldSchema(name:p_container, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_partkey SIMPLE [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_retailprice SIMPLE [(part)part.FieldSchema(name:p_retailprice, type:double, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_type SIMPLE [(part)part.FieldSchema(name:p_type, type:string, comment:null), ] +PREHOOK: query: insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@part_null_n0 +POSTHOOK: query: insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@part_null_n0 +POSTHOOK: Lineage: part_null_n0.p_brand EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_comment EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_container EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_mfgr EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_name EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_partkey EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_retailprice EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_size EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_type EXPRESSION [] +PREHOOK: query: explain cbo select count(*) from part where p_partkey = ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey = ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveSemiJoin(condition=[=($0, $1)], joinType=[inner]) + HiveProject(p_partkey=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(p_partkey=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +PREHOOK: query: select count(*) from part where p_partkey = ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey = ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey > ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey > ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[OR(AND(>($0, $1), $2), AND(>($0, $1), $2, IS NOT TRUE(>($0, $1)), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey > ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey > ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +25 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey < ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey < ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[OR(AND(<($0, $1), $2), AND(<($0, $1), $2, IS NOT TRUE(<($0, $1)), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey < ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey < ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +25 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[OR(AND(>=($0, $1), $2), AND(>=($0, $1), $2, IS NOT TRUE(>=($0, $1)), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey >= ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey >= ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey <= ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey <= ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[OR(AND(<=($0, $1), $2), AND(<=($0, $1), $2, IS NOT TRUE(<=($0, $1)), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey <= ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey <= ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +PREHOOK: query: explain cbo select count(*) from part where p_partkey = SOME(select min(p_partkey) from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey = SOME(select min(p_partkey) from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject($f0=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[min($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +PREHOOK: query: select count(*) from part where p_partkey = SOME(select min(p_partkey) from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey = SOME(select min(p_partkey) from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +1 +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_size < ANY (select max(p_size) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_size < ANY (select max(p_size) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[OR(AND(<($0, $1), $2), AND(<($0, $1), $2, IS NOT TRUE(<($0, $1)), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_size=[$5]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($1)], c=[COUNT()], d=[COUNT($1)]) + HiveProject(p_partkey=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[max($5)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_size < ANY (select max(p_size) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_size < ANY (select max(p_size) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +25 +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_size < ANY (select max(null) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_size < ANY (select max(null) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from part where p_partkey = ANY(select i from tempty) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@tempty +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey = ANY(select i from tempty) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@tempty +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from part where p_partkey = ANY (select p_partkey from part_null_n0) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey = ANY (select p_partkey from part_null_n0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: select count(*) from part where (p_size= ANY (select p_partkey from part_null_n0)) is null +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where (p_size= ANY (select p_partkey from part_null_n0)) is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select count(*) from part where (p_partkey = ANY (select p_partkey from part_null_n0 where p_partkey is null)) is null +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where (p_partkey = ANY (select p_partkey from part_null_n0 where p_partkey is null)) is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey > ANY (select max(p_partkey) from part_null_n0) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey > ANY (select max(p_partkey) from part_null_n0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: explain cbo select count(*) from part having count(*) > ANY (select count(*) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part having count(*) > ANY (select count(*) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveProject($f0=[$3]) + HiveJoin(condition=[OR(AND(>($3, $0), $1), AND(>($3, $0), $1, IS NOT TRUE(>($3, $0)), $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)]) + HiveProject(p_partkey=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select count(*) from part having count(*) > ANY (select count(*) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part having count(*) > ANY (select count(*) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part) + AND p_size = ANY (select p_size from part group by p_size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part) + AND p_size = ANY (select p_size from part group by p_size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(=($1, $2), OR(AND(>=($0, $3), <>($4, 0)), AND(>=($0, $3), <>($4, 0), IS NOT TRUE(>=($0, $3)), <=($4, $5))))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0], p_size=[$5]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_size=[$0]) + HiveAggregate(group=[{5}]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], c=[$1], d=[$2]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: select count(*) from part where p_partkey >= ANY (select p_partkey from part) + AND p_size = ANY (select p_size from part group by p_size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey >= ANY (select p_partkey from part) + AND p_size = ANY (select p_size from part group by p_size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey + >= ANY (select p_partkey from part where p_size >= ANY(select p_size from part_null_n0 group by p_size)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey + >= ANY (select p_partkey from part where p_size >= ANY(select p_size from part_null_n0 group by p_size)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[OR(AND(>=($0, $1), $2), AND(>=($0, $1), $2, IS NOT TRUE(>=($0, $1)), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveJoin(condition=[OR(AND(>=($1, $2), $3), AND(>=($1, $2), $3, IS NOT TRUE(>=($1, $2)), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0], p_size=[$5]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveProject(p_size=[$0]) + HiveAggregate(group=[{5}]) + HiveTableScan(table=[[default, part_null_n0]], table:alias=[part_null_n0]) + +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey + >= ANY (select p_partkey from part where p_size >= ANY(select p_size from part_null_n0 group by p_size)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey + >= ANY (select p_partkey from part where p_size >= ANY(select p_size from part_null_n0 group by p_size)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26