diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 6f01da0..4fce1ac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -213,6 +213,7 @@ system.registerGenericUDF("ceiling", GenericUDFCeil.class); system.registerUDF("rand", UDFRand.class, false); system.registerGenericUDF("abs", GenericUDFAbs.class); + system.registerGenericUDF("sq_count_check", GenericUDFSQCountCheck.class); system.registerGenericUDF("pmod", GenericUDFPosMod.class); system.registerUDF("ln", UDFLn.class, false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index a373cdd..378cbbb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -227,8 +227,8 @@ private void setCurrent(RelNode root, LogicalCorrelate corRel) { private RelNode decorrelate(RelNode root) { // first adjust count() expression if any HepProgram program = HepProgram.builder() - .addRuleInstance(new AdjustProjectForCountAggregateRule(false)) - .addRuleInstance(new AdjustProjectForCountAggregateRule(true)) + //.addRuleInstance(new AdjustProjectForCountAggregateRule(false)) + //.addRuleInstance(new AdjustProjectForCountAggregateRule(true)) .addRuleInstance(FilterJoinRule.FILTER_ON_JOIN) .addRuleInstance(FilterProjectTransposeRule.INSTANCE) .addRuleInstance(FilterCorrelateRule.INSTANCE) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index f1e8ebd..73c807b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -25,7 +25,6 @@ import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.LogicVisitor; import org.apache.calcite.rex.RexInputRef; @@ -33,8 +32,13 @@ import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlFunctionCategory; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.calcite.util.ImmutableBitSet; @@ -49,6 +53,8 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveSubQRemoveRelBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import scala.reflect.internal.Trees; /** * NOTE: this rule is replicated from Calcite's SubqueryRemoveRule @@ -109,11 +115,33 @@ protected RexNode apply(RexSubQuery e, Set variablesSet, HiveSubQRemoveRelBuilder builder, int inputCount, int offset) { switch (e.getKind()) { case SCALAR_QUERY: + final List parentQueryFields = new ArrayList<>(); + parentQueryFields.addAll(builder.fields()); + builder.push(e.rel); final RelMetadataQuery mq = RelMetadataQuery.instance(); final Boolean unique = mq.areColumnsUnique(builder.peek(), ImmutableBitSet.of()); - if (unique == null || !unique) { + //TODO: need to add check to determine if subquery expression + // returns single row/column + builder.aggregate(builder.groupKey(), + builder.count(false, "cnt")); + + SqlFunction countCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, + InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION); + builder.project(builder.call(countCheck, builder.field("cnt"))); + + if( !variablesSet.isEmpty()) + { + //builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); + builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); + } + else + builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); + //offset += 1; + //builder.project(parentQueryFields); + builder.push(e.rel); + if (/*unique == null || !unique*/ false) { builder.aggregate(builder.groupKey(), builder.aggregateCall(SqlStdOperatorTable.SINGLE_VALUE, false, null, null, builder.field(0))); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 8d2e535..ca1dfd1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -178,7 +178,7 @@ public RexNode convert(ExprNodeDesc expr) throws SemanticException { } private RexNode convert(final ExprNodeSubQueryDesc subQueryDesc) throws SemanticException { - if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.IN) { + if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.IN ) { /* * Check.5.h :: For In and Not In the SubQuery must implicitly or * explicitly only contain one select item. @@ -199,9 +199,19 @@ else if( subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.EXISTS) { RexNode subQueryNode = RexSubQuery.exists(subQueryDesc.getRexSubQuery()); return subQueryNode; } + else if( subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.SCALAR){ + if(subQueryDesc.getRexSubQuery().getRowType().getFieldCount() > 1) { + throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "SubQuery can contain only 1 item in Select List.")); + } + //create RexSubQuery node + RexNode rexSubQuery = RexSubQuery.scalar(subQueryDesc.getRexSubQuery()); + return rexSubQuery; + } + else { throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "Currently only IN and EXISTS type of subqueries are supported")); + "Invalid subquery")); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 9f1b9d5..b0a3282 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -249,7 +249,6 @@ import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.google.common.collect.Multimap; public class CalcitePlanner extends SemanticAnalyzer { @@ -2275,7 +2274,6 @@ private void subqueryRestrictionCheck(QB qb, ASTNode searchCond, RelNode srcRel, ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); ASTNode subQueryAST = subQueries.get(i); - SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, @@ -2298,7 +2296,7 @@ private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean Map aliasToRel) throws SemanticException { //disallow subqueries which HIVE doesn't currently support - subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, aliasToRel); + //subqueryRestritionCheck(qb, node, srcRel, forHavingClause, aliasToRel); Deque stack = new ArrayDeque(); stack.push(node); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index cd9adfc..b90210c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -397,12 +397,13 @@ atomExpression | floorExpression | caseExpression | whenExpression + | (LPAREN KW_SELECT)=> (subQueryExpression) + -> ^(TOK_SUBQUERY_EXPR TOK_SUBQUERY_OP subQueryExpression) | (functionName LPAREN) => function | tableOrColumn | LPAREN! expression RPAREN! ; - precedenceFieldExpression : atomExpression ((LSQUARE^ expression RSQUARE!) | (DOT^ identifier))* @@ -531,7 +532,7 @@ precedenceEqualExpressionSingle -> ^(KW_NOT ^(precedenceEqualNegatableOperator $precedenceEqualExpressionSingle $notExpr)) | (precedenceEqualOperator equalExpr=precedenceBitwiseOrExpression) -> ^(precedenceEqualOperator $precedenceEqualExpressionSingle $equalExpr) - | (KW_NOT KW_IN LPAREN KW_SELECT)=> (KW_NOT KW_IN subQueryExpression) + | (KW_NOT KW_IN LPAREN KW_SELECT)=> (KW_NOT KW_IN subQueryExpression) -> ^(KW_NOT ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpressionSingle)) | (KW_NOT KW_IN expressions) -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionSingle expressions)) @@ -544,7 +545,8 @@ precedenceEqualExpressionSingle | ( KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) -> ^(TOK_FUNCTION Identifier["between"] KW_FALSE $left $min $max) )* - | (KW_EXISTS LPAREN KW_SELECT)=> (KW_EXISTS subQueryExpression) -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_EXISTS) subQueryExpression) + | (KW_EXISTS LPAREN KW_SELECT)=> (KW_EXISTS subQueryExpression) + -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_EXISTS) subQueryExpression) ; expressions diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 6c30efd..4aff56b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.lib.ExpressionWalker; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -1414,10 +1415,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ASTNode subqueryOp = (ASTNode) expr.getChild(0); - boolean isIN = (subqueryOp.getChild(0).getType() == HiveParser.KW_IN + boolean isIN = (subqueryOp.getChildCount() > 0) && (subqueryOp.getChild(0).getType() == HiveParser.KW_IN || subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTIN); - boolean isEXISTS = (subqueryOp.getChild(0).getType() == HiveParser.KW_EXISTS + boolean isEXISTS = (subqueryOp.getChildCount() > 0) && (subqueryOp.getChild(0).getType() == HiveParser.KW_EXISTS || subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTEXISTS); + boolean isScalar = subqueryOp.getChildCount() == 0 ; // subqueryToRelNode might be null if subquery expression anywhere other than // as expected in filter (where/having). We should throw an appropriate error @@ -1430,25 +1432,38 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, "Where and Having Clause predicates")); } + RelNode subqueryRel = subqueryToRelNode.get(expr); + //For now because subquery is only supported in filter // we will create subquery expression of boolean type if(isEXISTS) { - return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryToRelNode.get(expr), + return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryRel, ExprNodeSubQueryDesc.SubqueryType.EXISTS); } - if(isIN) { + else if(isIN) { assert(nodeOutputs[2] != null); ExprNodeDesc lhs = (ExprNodeDesc)nodeOutputs[2]; - return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryToRelNode.get(expr), + return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryRel, ExprNodeSubQueryDesc.SubqueryType.IN, lhs); } + else if(isScalar){ + // only single subquery expr is supported + if(subqueryRel.getRowType().getFieldCount() != 1) { + throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "More than one column expression in subquery")); + } + // figure out subquery expression column's type + TypeInfo subExprType = TypeConverter.convert(subqueryRel.getRowType().getFieldList().get(0).getType()); + return new ExprNodeSubQueryDesc(subExprType, subqueryRel, + ExprNodeSubQueryDesc.SubqueryType.SCALAR); + } /* * Restriction.1.h :: SubQueries only supported in the SQL Where Clause. */ ctx.setError(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(sqNode, - "Currently only IN & EXISTS SubQuery expressions are allowed"), - sqNode); + "Currently only IN & EXISTS SubQuery expressions are allowed"), + sqNode); return null; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java index aec331b..462e730 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java @@ -35,11 +35,9 @@ public static enum SubqueryType{ IN, EXISTS, + SCALAR }; - public static final int IN=1; - public static final int EXISTS=2; - /** * RexNode corresponding to subquery. */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSQCountCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSQCountCheck.java new file mode 100644 index 0000000..9d9b5e0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSQCountCheck.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncAbsDecimalToDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncAbsDoubleToDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncAbsLongToLong; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; + +/** + * GenericUDFAbs. + * + */ +@Description(name = "sq_count_check", + value = "_FUNC_(x) - Internal check on scalar subquery expression to make sure atmost one row is returned", + extended = "For internal use only") +@VectorizedExpressions({FuncAbsLongToLong.class, FuncAbsDoubleToDouble.class, FuncAbsDecimalToDecimal.class}) +public class GenericUDFSQCountCheck extends GenericUDF { + private final LongWritable resultLong = new LongWritable(); + private transient Converter[] converters = new Converter[1]; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException( + "Invalid scalar subquery expression. Subquery count check expected one argument but received: " + arguments.length); + } + + converters[0] = ObjectInspectorConverters.getConverter(arguments[0], + PrimitiveObjectInspectorFactory.writableLongObjectInspector); + + ObjectInspector outputOI = null; + outputOI = PrimitiveObjectInspectorFactory.writableLongObjectInspector; + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object valObject = arguments[0].get(); + if (valObject == null) { + return null; + } + Long val = getLongValue(arguments, 0, converters); + assert(val >= 0); + if(val > 1) { + throw new UDFArgumentException( + " Scalar subquery expression returns more than one row."); + } + + resultLong.set(val); + return resultLong; + } + + @Override + protected String getFuncName() { + return "sq_count_check"; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString(getFuncName(), children); + } + +} diff --git a/ql/src/test/queries/clientnegative/scalar_sub1.q b/ql/src/test/queries/clientnegative/scalar_sub1.q new file mode 100644 index 0000000..14b68b9 --- /dev/null +++ b/ql/src/test/queries/clientnegative/scalar_sub1.q @@ -0,0 +1,2 @@ +set hive.mapred.mode=nonstrict; +select p_name from part where p_size > (select p_size from part); diff --git a/ql/src/test/queries/clientpositive/perf/query1.q b/ql/src/test/queries/clientpositive/perf/query1.q new file mode 100644 index 0000000..988eda6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query1.q @@ -0,0 +1,22 @@ +explain with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query23.q b/ql/src/test/queries/clientpositive/perf/query23.q index e8ebd86..631d5a0 100644 --- a/ql/src/test/queries/clientpositive/perf/query23.q +++ b/ql/src/test/queries/clientpositive/perf/query23.q @@ -26,23 +26,26 @@ explain with frequent_ss_items as ,customer where ss_customer_sk = c_customer_sk group by c_customer_sk - having sum(ss_quantity*ss_sales_price) > (95/100.0)) + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) select sum(sales) - from (select cs_quantity*cs_list_price sales + from ((select cs_quantity*cs_list_price sales from catalog_sales ,date_dim where d_year = 1999 and d_moy = 1 and cs_sold_date_sk = d_date_sk and cs_item_sk in (select item_sk from frequent_ss_items) - and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)) union all - select ws_quantity*ws_list_price sales + (select ws_quantity*ws_list_price sales from web_sales ,date_dim where d_year = 1999 and d_moy = 1 and ws_sold_date_sk = d_date_sk and ws_item_sk in (select item_sk from frequent_ss_items) - and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) y + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query30.q b/ql/src/test/queries/clientpositive/perf/query30.q new file mode 100644 index 0000000..7fa5752 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query30.q @@ -0,0 +1,28 @@ +explain with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2002 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query41.q b/ql/src/test/queries/clientpositive/perf/query41.q new file mode 100644 index 0000000..3c935eb --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query41.q @@ -0,0 +1,49 @@ +explain select distinct(i_product_name) + from item i1 + where i_manufact_id between 742 and 742+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'orchid' or i_color = 'papaya') and + (i_units = 'Pound' or i_units = 'Lb') and + (i_size = 'petite' or i_size = 'medium') + ) or + (i_category = 'Women' and + (i_color = 'burlywood' or i_color = 'navy') and + (i_units = 'Bundle' or i_units = 'Each') and + (i_size = 'N/A' or i_size = 'extra large') + ) or + (i_category = 'Men' and + (i_color = 'bisque' or i_color = 'azure') and + (i_units = 'N/A' or i_units = 'Tsp') and + (i_size = 'small' or i_size = 'large') + ) or + (i_category = 'Men' and + (i_color = 'chocolate' or i_color = 'cornflower') and + (i_units = 'Bunch' or i_units = 'Gross') and + (i_size = 'petite' or i_size = 'medium') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'salmon' or i_color = 'midnight') and + (i_units = 'Oz' or i_units = 'Box') and + (i_size = 'petite' or i_size = 'medium') + ) or + (i_category = 'Women' and + (i_color = 'snow' or i_color = 'steel') and + (i_units = 'Carton' or i_units = 'Tbl') and + (i_size = 'N/A' or i_size = 'extra large') + ) or + (i_category = 'Men' and + (i_color = 'purple' or i_color = 'gainsboro') and + (i_units = 'Dram' or i_units = 'Unknown') and + (i_size = 'small' or i_size = 'large') + ) or + (i_category = 'Men' and + (i_color = 'metallic' or i_color = 'forest') and + (i_units = 'Gram' or i_units = 'Ounce') and + (i_size = 'petite' or i_size = 'medium') + )))) > 0 + order by i_product_name + limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query6.q b/ql/src/test/queries/clientpositive/perf/query6.q new file mode 100644 index 0000000..8e4a7d3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query6.q @@ -0,0 +1,25 @@ +set hive.mapred.mode=nonstrict; + +explain select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2000 + and d_moy = 2 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt + limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query81.q b/ql/src/test/queries/clientpositive/perf/query81.q new file mode 100644 index 0000000..a9a458e --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query81.q @@ -0,0 +1,28 @@ +explain with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100; diff --git a/ql/src/test/results/clientnegative/scalar_sub1.q.out b/ql/src/test/results/clientnegative/scalar_sub1.q.out new file mode 100644 index 0000000..354ac74 --- /dev/null +++ b/ql/src/test/results/clientnegative/scalar_sub1.q.out @@ -0,0 +1,7 @@ +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select p_name from part where p_size > (select p_size from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask diff --git a/ql/src/test/results/clientpositive/perf/query1.q.out b/ql/src/test/results/clientpositive/perf/query1.q.out new file mode 100644 index 0000000..792f50c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query1.q.out @@ -0,0 +1,397 @@ +PREHOOK: query: explain with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 25 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 26 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (SIMPLE_EDGE) +Reducer 28 <- Map 27 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) +Reducer 29 <- Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (SIMPLE_EDGE), Reducer 38 (SIMPLE_EDGE) +Reducer 31 <- Reducer 30 (SIMPLE_EDGE) +Reducer 34 <- Map 33 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) +Reducer 35 <- Reducer 34 (SIMPLE_EDGE) +Reducer 36 <- Map 40 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE) +Reducer 37 <- Map 41 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE) +Reducer 38 <- Reducer 37 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 31 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 8 + File Output Operator [FS_149] + Limit [LIM_148] (rows=100 width=860) + Number of rows:100 + Select Operator [SEL_147] (rows=35493334 width=860) + Output:["_col0"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_146] + Select Operator [SEL_145] (rows=35493334 width=860) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_239] (rows=35493334 width=860) + Conds:RS_142._col0=RS_143._col0(Left Outer),Output:["_col1"] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_143] + PartitionCols:_col0 + Group By Operator [GBY_140] (rows=24200000 width=860) + Output:["_col0"],keys:KEY._col0 + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_139] + PartitionCols:_col0 + Group By Operator [GBY_138] (rows=48400001 width=860) + Output:["_col0"],keys:_col3 + Merge Join Operator [MERGEJOIN_238] (rows=48400001 width=860) + Conds:RS_134._col1=RS_135._col0(Inner),Output:["_col3"] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_134] + PartitionCols:_col1 + Select Operator [SEL_102] (rows=31675133 width=77) + Output:["_col1"] + Group By Operator [GBY_101] (rows=31675133 width=77) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_100] + PartitionCols:_col0, _col1 + Group By Operator [GBY_99] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Select Operator [SEL_98] (rows=63350266 width=77) + Output:["_col2","_col1","_col3"] + Merge Join Operator [MERGEJOIN_232] (rows=63350266 width=77) + Conds:RS_95._col0=RS_96._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 27 [SIMPLE_EDGE] + SHUFFLE [RS_95] + PartitionCols:_col0 + Select Operator [SEL_91] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_219] (rows=57591150 width=77) + predicate:sr_returned_date_sk is not null + TableScan [TS_89] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] + <-Map 32 [SIMPLE_EDGE] + SHUFFLE [RS_96] + PartitionCols:_col0 + Select Operator [SEL_94] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_220] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) + TableScan [TS_92] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 38 [SIMPLE_EDGE] + SHUFFLE [RS_135] + PartitionCols:_col0 + Group By Operator [GBY_132] (rows=44000000 width=860) + Output:["_col0"],keys:KEY._col0 + <-Reducer 37 [SIMPLE_EDGE] + SHUFFLE [RS_131] + PartitionCols:_col0 + Group By Operator [GBY_130] (rows=88000001 width=860) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_235] (rows=88000001 width=860) + Conds:RS_126._col0=RS_127._col0(Inner),Output:["_col1"] + <-Map 41 [SIMPLE_EDGE] + SHUFFLE [RS_127] + PartitionCols:_col0 + Select Operator [SEL_122] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_224] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_120] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_126] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_234] (rows=34842647 width=77) + Conds:RS_123._col1=RS_124._col0(Inner),Output:["_col0","_col1"] + <-Map 40 [SIMPLE_EDGE] + SHUFFLE [RS_124] + PartitionCols:_col0 + Select Operator [SEL_119] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_223] (rows=852 width=1910) + predicate:((s_state = 'NM') and s_store_sk is not null) + TableScan [TS_117] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_123] + PartitionCols:_col1 + Select Operator [SEL_116] (rows=31675133 width=77) + Output:["_col0","_col1"] + Group By Operator [GBY_115] (rows=31675133 width=77) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_114] + PartitionCols:_col0, _col1 + Group By Operator [GBY_113] (rows=63350266 width=77) + Output:["_col0","_col1"],keys:_col2, _col1 + Select Operator [SEL_112] (rows=63350266 width=77) + Output:["_col2","_col1"] + Merge Join Operator [MERGEJOIN_233] (rows=63350266 width=77) + Conds:RS_109._col0=RS_110._col0(Inner),Output:["_col1","_col2"] + <-Map 33 [SIMPLE_EDGE] + SHUFFLE [RS_109] + PartitionCols:_col0 + Select Operator [SEL_105] (rows=57591150 width=77) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_221] (rows=57591150 width=77) + predicate:(sr_returned_date_sk is not null and sr_store_sk is not null and sr_customer_sk is not null) + TableScan [TS_103] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk"] + <-Map 39 [SIMPLE_EDGE] + SHUFFLE [RS_110] + PartitionCols:_col0 + Select Operator [SEL_108] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_222] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) + TableScan [TS_106] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_142] + PartitionCols:_col0 + Select Operator [SEL_88] (rows=32266667 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_87] (rows=32266667 width=860) + predicate:(_col2 > _col7) + Merge Join Operator [MERGEJOIN_237] (rows=96800003 width=860) + Conds:RS_84._col1=RS_85._col1(Inner),Output:["_col1","_col2","_col6","_col7"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_85] + PartitionCols:_col1 + Select Operator [SEL_77] (rows=12100000 width=860) + Output:["_col0","_col1"] + Group By Operator [GBY_76] (rows=12100000 width=860) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Group By Operator [GBY_71] (rows=24200000 width=860) + Output:["_col0"],keys:KEY._col0 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col0 + Group By Operator [GBY_69] (rows=48400001 width=860) + Output:["_col0"],keys:_col3 + Merge Join Operator [MERGEJOIN_236] (rows=48400001 width=860) + Conds:RS_65._col1=RS_66._col0(Inner),Output:["_col3"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col1 + Select Operator [SEL_33] (rows=31675133 width=77) + Output:["_col1"] + Group By Operator [GBY_32] (rows=31675133 width=77) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col0, _col1 + Group By Operator [GBY_30] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Select Operator [SEL_29] (rows=63350266 width=77) + Output:["_col2","_col1","_col3"] + Merge Join Operator [MERGEJOIN_228] (rows=63350266 width=77) + Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Select Operator [SEL_22] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_213] (rows=57591150 width=77) + predicate:sr_returned_date_sk is not null + TableScan [TS_20] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_25] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_214] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) + TableScan [TS_23] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0 + Group By Operator [GBY_63] (rows=44000000 width=860) + Output:["_col0"],keys:KEY._col0 + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col0 + Group By Operator [GBY_61] (rows=88000001 width=860) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_231] (rows=88000001 width=860) + Conds:RS_57._col0=RS_58._col0(Inner),Output:["_col1"] + <-Map 26 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0 + Select Operator [SEL_53] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_218] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_51] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_230] (rows=34842647 width=77) + Conds:RS_54._col1=RS_55._col0(Inner),Output:["_col0","_col1"] + <-Map 25 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0 + Select Operator [SEL_50] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_217] (rows=852 width=1910) + predicate:((s_state = 'NM') and s_store_sk is not null) + TableScan [TS_48] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:_col1 + Select Operator [SEL_47] (rows=31675133 width=77) + Output:["_col0","_col1"] + Group By Operator [GBY_46] (rows=31675133 width=77) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0, _col1 + Group By Operator [GBY_44] (rows=63350266 width=77) + Output:["_col0","_col1"],keys:_col2, _col1 + Select Operator [SEL_43] (rows=63350266 width=77) + Output:["_col2","_col1"] + Merge Join Operator [MERGEJOIN_229] (rows=63350266 width=77) + Conds:RS_40._col0=RS_41._col0(Inner),Output:["_col1","_col2"] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Select Operator [SEL_36] (rows=57591150 width=77) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_215] (rows=57591150 width=77) + predicate:(sr_returned_date_sk is not null and sr_store_sk is not null and sr_customer_sk is not null) + TableScan [TS_34] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk"] + <-Map 24 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Select Operator [SEL_39] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_216] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) + TableScan [TS_37] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_84] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_227] (rows=88000001 width=860) + Conds:RS_81._col0=RS_82._col0(Inner),Output:["_col1","_col2","_col6"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_82] + PartitionCols:_col0 + Select Operator [SEL_19] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_212] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_17] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_81] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_226] (rows=34842647 width=77) + Conds:RS_78._col1=RS_79._col0(Inner),Output:["_col0","_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_79] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_211] (rows=852 width=1910) + predicate:((s_state = 'NM') and s_store_sk is not null) + TableScan [TS_14] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_78] + PartitionCols:_col1 + Select Operator [SEL_13] (rows=31675133 width=77) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_12] (rows=31675133 width=77) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Group By Operator [GBY_10] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Select Operator [SEL_9] (rows=63350266 width=77) + Output:["_col2","_col1","_col3"] + Merge Join Operator [MERGEJOIN_225] (rows=63350266 width=77) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_209] (rows=57591150 width=77) + predicate:(sr_returned_date_sk is not null and sr_store_sk is not null and sr_customer_sk is not null) + TableScan [TS_0] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_210] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + diff --git a/ql/src/test/results/clientpositive/perf/query23.q.out b/ql/src/test/results/clientpositive/perf/query23.q.out index 6d4cfca..6410adb 100644 --- a/ql/src/test/results/clientpositive/perf/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/query23.q.out @@ -1,3 +1,7 @@ +Warning: Shuffle Join MERGEJOIN[369][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 17' is a cross product +Warning: Shuffle Join MERGEJOIN[370][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[372][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 48' is a cross product +Warning: Shuffle Join MERGEJOIN[373][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 49' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt from store_sales @@ -24,25 +28,28 @@ PREHOOK: query: explain with frequent_ss_items as ,customer where ss_customer_sk = c_customer_sk group by c_customer_sk - having sum(ss_quantity*ss_sales_price) > (95/100.0)) + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) select sum(sales) - from (select cs_quantity*cs_list_price sales + from ((select cs_quantity*cs_list_price sales from catalog_sales ,date_dim where d_year = 1999 and d_moy = 1 and cs_sold_date_sk = d_date_sk and cs_item_sk in (select item_sk from frequent_ss_items) - and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)) union all - select ws_quantity*ws_list_price sales + (select ws_quantity*ws_list_price sales from web_sales ,date_dim where d_year = 1999 and d_moy = 1 and ws_sold_date_sk = d_date_sk and ws_item_sk in (select item_sk from frequent_ss_items) - and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) y + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y limit 100 PREHOOK: type: QUERY POSTHOOK: query: explain with frequent_ss_items as @@ -71,25 +78,28 @@ POSTHOOK: query: explain with frequent_ss_items as ,customer where ss_customer_sk = c_customer_sk group by c_customer_sk - having sum(ss_quantity*ss_sales_price) > (95/100.0)) + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) select sum(sales) - from (select cs_quantity*cs_list_price sales + from ((select cs_quantity*cs_list_price sales from catalog_sales ,date_dim where d_year = 1999 and d_moy = 1 and cs_sold_date_sk = d_date_sk and cs_item_sk in (select item_sk from frequent_ss_items) - and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)) union all - select ws_quantity*ws_list_price sales + (select ws_quantity*ws_list_price sales from web_sales ,date_dim where d_year = 1999 and d_moy = 1 and ws_sold_date_sk = d_date_sk and ws_item_sk in (select item_sk from frequent_ss_items) - and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) y + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y limit 100 POSTHOOK: type: QUERY Plan optimized by CBO. @@ -97,20 +107,40 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 25 <- Map 28 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 23 <- Map 27 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (SIMPLE_EDGE) +Reducer 29 <- Map 28 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 29 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) +Reducer 30 <- Map 33 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) Reducer 31 <- Reducer 30 (SIMPLE_EDGE) -Reducer 4 <- Reducer 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 35 <- Map 34 (SIMPLE_EDGE), Map 38 (SIMPLE_EDGE) +Reducer 36 <- Reducer 35 (SIMPLE_EDGE), Reducer 42 (SIMPLE_EDGE) +Reducer 37 <- Reducer 36 (SIMPLE_EDGE), Reducer 50 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 4 <- Reducer 19 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 40 <- Map 39 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE) +Reducer 41 <- Map 44 (SIMPLE_EDGE), Reducer 40 (SIMPLE_EDGE) +Reducer 42 <- Reducer 41 (SIMPLE_EDGE) +Reducer 46 <- Map 45 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE) +Reducer 47 <- Reducer 46 (SIMPLE_EDGE) +Reducer 48 <- Reducer 47 (SIMPLE_EDGE), Reducer 56 (SIMPLE_EDGE) +Reducer 49 <- Reducer 48 (SIMPLE_EDGE), Reducer 62 (SIMPLE_EDGE) +Reducer 50 <- Reducer 49 (SIMPLE_EDGE) +Reducer 53 <- Map 52 (SIMPLE_EDGE), Map 57 (SIMPLE_EDGE) +Reducer 54 <- Map 58 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE) +Reducer 55 <- Reducer 54 (SIMPLE_EDGE) +Reducer 56 <- Reducer 55 (SIMPLE_EDGE) Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 60 <- Map 59 (SIMPLE_EDGE), Map 63 (SIMPLE_EDGE) +Reducer 61 <- Map 64 (SIMPLE_EDGE), Reducer 60 (SIMPLE_EDGE) +Reducer 62 <- Reducer 61 (SIMPLE_EDGE) Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 @@ -118,193 +148,419 @@ Stage-0 limit:100 Stage-1 Reducer 6 - File Output Operator [FS_136] - Limit [LIM_135] (rows=1 width=112) + File Output Operator [FS_248] + Limit [LIM_247] (rows=1 width=112) Number of rows:100 - Group By Operator [GBY_133] (rows=1 width=112) + Group By Operator [GBY_245] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Union 5 [SIMPLE_EDGE] - <-Reducer 21 [CONTAINS] - Reduce Output Operator [RS_132] - Group By Operator [GBY_131] (rows=1 width=112) + <-Reducer 37 [CONTAINS] + Reduce Output Operator [RS_244] + Group By Operator [GBY_243] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_127] (rows=191667562 width=135) + Select Operator [SEL_239] (rows=20239413373878016 width=186) Output:["_col0"] - Merge Join Operator [MERGEJOIN_206] (rows=191667562 width=135) - Conds:RS_124._col2=RS_125._col0(Inner),Output:["_col3","_col4"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_124] + Merge Join Operator [MERGEJOIN_374] (rows=20239413373878016 width=186) + Conds:RS_236._col2=RS_237._col0(Inner),Output:["_col3","_col4"] + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_236] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_204] (rows=174243235 width=135) - Conds:RS_121._col1=RS_122._col0(Inner),Output:["_col2","_col3","_col4"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_121] + Merge Join Operator [MERGEJOIN_368] (rows=174243235 width=135) + Conds:RS_233._col1=RS_234._col0(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_233] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_199] (rows=158402938 width=135) - Conds:RS_118._col0=RS_119._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_118] + Merge Join Operator [MERGEJOIN_359] (rows=158402938 width=135) + Conds:RS_230._col0=RS_231._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 34 [SIMPLE_EDGE] + SHUFFLE [RS_230] PartitionCols:_col0 - Select Operator [SEL_66] (rows=144002668 width=135) + Select Operator [SEL_122] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_186] (rows=144002668 width=135) + Filter Operator [FIL_336] (rows=144002668 width=135) predicate:ws_sold_date_sk is not null - TableScan [TS_64] (rows=144002668 width=135) + TableScan [TS_120] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] - <-Map 22 [SIMPLE_EDGE] - SHUFFLE [RS_119] + <-Map 38 [SIMPLE_EDGE] + SHUFFLE [RS_231] PartitionCols:_col0 - Select Operator [SEL_69] (rows=18262 width=1119) + Select Operator [SEL_125] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_187] (rows=18262 width=1119) + Filter Operator [FIL_337] (rows=18262 width=1119) predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) - TableScan [TS_67] (rows=73049 width=1119) + TableScan [TS_123] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_122] + <-Reducer 42 [SIMPLE_EDGE] + SHUFFLE [RS_234] PartitionCols:_col0 - Group By Operator [GBY_95] (rows=58079562 width=88) + Group By Operator [GBY_151] (rows=58079562 width=88) Output:["_col0"],keys:_col1 - Select Operator [SEL_91] (rows=116159124 width=88) + Select Operator [SEL_147] (rows=116159124 width=88) Output:["_col1"] - Filter Operator [FIL_90] (rows=116159124 width=88) + Filter Operator [FIL_146] (rows=116159124 width=88) predicate:(_col3 > 4) - Select Operator [SEL_193] (rows=348477374 width=88) + Select Operator [SEL_350] (rows=348477374 width=88) Output:["_col0","_col3"] - Group By Operator [GBY_89] (rows=348477374 width=88) + Group By Operator [GBY_145] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_88] + <-Reducer 41 [SIMPLE_EDGE] + SHUFFLE [RS_144] PartitionCols:_col0 - Group By Operator [GBY_87] (rows=696954748 width=88) + Group By Operator [GBY_143] (rows=696954748 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_85] (rows=696954748 width=88) + Select Operator [SEL_141] (rows=696954748 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_201] (rows=696954748 width=88) - Conds:RS_82._col1=RS_83._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 28 [SIMPLE_EDGE] - SHUFFLE [RS_83] + Merge Join Operator [MERGEJOIN_361] (rows=696954748 width=88) + Conds:RS_138._col1=RS_139._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 44 [SIMPLE_EDGE] + SHUFFLE [RS_139] PartitionCols:_col0 - Select Operator [SEL_78] (rows=462000 width=1436) + Select Operator [SEL_134] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_190] (rows=462000 width=1436) + Filter Operator [FIL_340] (rows=462000 width=1436) predicate:i_item_sk is not null - TableScan [TS_76] (rows=462000 width=1436) + TableScan [TS_132] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_82] + <-Reducer 40 [SIMPLE_EDGE] + SHUFFLE [RS_138] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_200] (rows=633595212 width=88) - Conds:RS_79._col0=RS_80._col0(Inner),Output:["_col1","_col3"] - <-Map 23 [SIMPLE_EDGE] - SHUFFLE [RS_79] + Merge Join Operator [MERGEJOIN_360] (rows=633595212 width=88) + Conds:RS_135._col0=RS_136._col0(Inner),Output:["_col1","_col3"] + <-Map 39 [SIMPLE_EDGE] + SHUFFLE [RS_135] PartitionCols:_col0 - Select Operator [SEL_72] (rows=575995635 width=88) + Select Operator [SEL_128] (rows=575995635 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_188] (rows=575995635 width=88) + Filter Operator [FIL_338] (rows=575995635 width=88) predicate:(ss_sold_date_sk is not null and ss_item_sk is not null) - TableScan [TS_70] (rows=575995635 width=88) + TableScan [TS_126] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"] - <-Map 27 [SIMPLE_EDGE] - SHUFFLE [RS_80] + <-Map 43 [SIMPLE_EDGE] + SHUFFLE [RS_136] PartitionCols:_col0 - Select Operator [SEL_75] (rows=36525 width=1119) + Select Operator [SEL_131] (rows=36525 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_189] (rows=36525 width=1119) + Filter Operator [FIL_339] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_73] (rows=73049 width=1119) + TableScan [TS_129] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"] - <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_125] + <-Reducer 50 [SIMPLE_EDGE] + SHUFFLE [RS_237] PartitionCols:_col0 - Group By Operator [GBY_116] (rows=52799601 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_112] (rows=105599202 width=88) - Output:["_col0"] - Filter Operator [FIL_111] (rows=105599202 width=88) - predicate:(_col1 > 0.95) - Group By Operator [GBY_110] (rows=316797606 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_109] - PartitionCols:_col0 - Group By Operator [GBY_108] (rows=633595212 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_106] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_202] (rows=633595212 width=88) - Conds:RS_103._col0=RS_104._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 29 [SIMPLE_EDGE] - SHUFFLE [RS_103] - PartitionCols:_col0 - Select Operator [SEL_99] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_191] (rows=575995635 width=88) - predicate:ss_customer_sk is not null - TableScan [TS_97] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Map 32 [SIMPLE_EDGE] - SHUFFLE [RS_104] - PartitionCols:_col0 - Select Operator [SEL_102] (rows=80000000 width=860) + Group By Operator [GBY_228] (rows=18399466304727774 width=186) + Output:["_col0"],keys:KEY._col0 + <-Reducer 49 [SIMPLE_EDGE] + SHUFFLE [RS_227] + PartitionCols:_col0 + Group By Operator [GBY_226] (rows=36798932609455548 width=186) + Output:["_col0"],keys:_col0 + Merge Join Operator [MERGEJOIN_373] (rows=36798932609455548 width=186) + Conds:(Left Outer),Output:["_col0"] + <-Reducer 48 [SIMPLE_EDGE] + SHUFFLE [RS_222] + Select Operator [SEL_199] (rows=105599202 width=97) + Output:["_col0"] + Filter Operator [FIL_198] (rows=105599202 width=97) + predicate:(_col1 > (0.95 * _col2)) + Merge Join Operator [MERGEJOIN_372] (rows=316797606 width=97) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 47 [SIMPLE_EDGE] + SHUFFLE [RS_195] + Group By Operator [GBY_166] (rows=316797606 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 46 [SIMPLE_EDGE] + SHUFFLE [RS_165] + PartitionCols:_col0 + Group By Operator [GBY_164] (rows=633595212 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_162] (rows=633595212 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_362] (rows=633595212 width=88) + Conds:RS_159._col0=RS_160._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 45 [SIMPLE_EDGE] + SHUFFLE [RS_159] + PartitionCols:_col0 + Select Operator [SEL_155] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_341] (rows=575995635 width=88) + predicate:ss_customer_sk is not null + TableScan [TS_153] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Map 51 [SIMPLE_EDGE] + SHUFFLE [RS_160] + PartitionCols:_col0 + Select Operator [SEL_158] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_342] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_156] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 56 [SIMPLE_EDGE] + SHUFFLE [RS_196] + Select Operator [SEL_194] (rows=1 width=8) + Output:["_col0"] + Group By Operator [GBY_193] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 55 [SIMPLE_EDGE] + SHUFFLE [RS_192] + Group By Operator [GBY_191] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_188] (rows=348477374 width=88) + Group By Operator [GBY_187] (rows=348477374 width=88) + Output:["_col0"],keys:KEY._col0 + <-Reducer 54 [SIMPLE_EDGE] + SHUFFLE [RS_186] + PartitionCols:_col0 + Group By Operator [GBY_185] (rows=696954748 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_183] (rows=696954748 width=88) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_364] (rows=696954748 width=88) + Conds:RS_180._col1=RS_181._col0(Inner),Output:["_col6"] + <-Map 58 [SIMPLE_EDGE] + SHUFFLE [RS_181] + PartitionCols:_col0 + Select Operator [SEL_176] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_345] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_174] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 53 [SIMPLE_EDGE] + SHUFFLE [RS_180] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_363] (rows=633595212 width=88) + Conds:RS_177._col0=RS_178._col0(Inner),Output:["_col1"] + <-Map 52 [SIMPLE_EDGE] + SHUFFLE [RS_177] + PartitionCols:_col0 + Select Operator [SEL_170] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_343] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_168] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Map 57 [SIMPLE_EDGE] + SHUFFLE [RS_178] + PartitionCols:_col0 + Select Operator [SEL_173] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_344] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_171] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 62 [SIMPLE_EDGE] + SHUFFLE [RS_223] + Select Operator [SEL_220] (rows=348477374 width=88) + Group By Operator [GBY_219] (rows=348477374 width=88) + Output:["_col0"],keys:KEY._col0 + <-Reducer 61 [SIMPLE_EDGE] + SHUFFLE [RS_218] + PartitionCols:_col0 + Group By Operator [GBY_217] (rows=696954748 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_215] (rows=696954748 width=88) Output:["_col0"] - Filter Operator [FIL_192] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_100] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + Merge Join Operator [MERGEJOIN_366] (rows=696954748 width=88) + Conds:RS_212._col1=RS_213._col0(Inner),Output:["_col6"] + <-Map 64 [SIMPLE_EDGE] + SHUFFLE [RS_213] + PartitionCols:_col0 + Select Operator [SEL_208] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_348] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_206] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 60 [SIMPLE_EDGE] + SHUFFLE [RS_212] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_365] (rows=633595212 width=88) + Conds:RS_209._col0=RS_210._col0(Inner),Output:["_col1"] + <-Map 59 [SIMPLE_EDGE] + SHUFFLE [RS_209] + PartitionCols:_col0 + Select Operator [SEL_202] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_346] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_200] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Map 63 [SIMPLE_EDGE] + SHUFFLE [RS_210] + PartitionCols:_col0 + Select Operator [SEL_205] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_347] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_203] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_132] - Group By Operator [GBY_131] (rows=1 width=112) + Reduce Output Operator [RS_244] + Group By Operator [GBY_243] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_63] (rows=383314495 width=135) + Select Operator [SEL_119] (rows=20239413373878016 width=186) Output:["_col0"] - Merge Join Operator [MERGEJOIN_205] (rows=383314495 width=135) - Conds:RS_60._col1=RS_61._col0(Inner),Output:["_col3","_col4"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_61] + Merge Join Operator [MERGEJOIN_371] (rows=20239413373878016 width=186) + Conds:RS_116._col1=RS_117._col0(Inner),Output:["_col3","_col4"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_117] PartitionCols:_col0 - Group By Operator [GBY_52] (rows=52799601 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_48] (rows=105599202 width=88) - Output:["_col0"] - Filter Operator [FIL_47] (rows=105599202 width=88) - predicate:(_col1 > 0.95) - Group By Operator [GBY_46] (rows=316797606 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col0 - Group By Operator [GBY_44] (rows=633595212 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_42] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_198] (rows=633595212 width=88) - Conds:RS_39._col0=RS_40._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col0 - Select Operator [SEL_35] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_184] (rows=575995635 width=88) - predicate:ss_customer_sk is not null - TableScan [TS_33] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col0 - Select Operator [SEL_38] (rows=80000000 width=860) + Group By Operator [GBY_108] (rows=18399466304727774 width=186) + Output:["_col0"],keys:KEY._col0 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_107] + PartitionCols:_col0 + Group By Operator [GBY_106] (rows=36798932609455548 width=186) + Output:["_col0"],keys:_col0 + Merge Join Operator [MERGEJOIN_370] (rows=36798932609455548 width=186) + Conds:(Left Outer),Output:["_col0"] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_102] + Select Operator [SEL_79] (rows=105599202 width=97) + Output:["_col0"] + Filter Operator [FIL_78] (rows=105599202 width=97) + predicate:(_col1 > (0.95 * _col2)) + Merge Join Operator [MERGEJOIN_369] (rows=316797606 width=97) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_75] + Group By Operator [GBY_46] (rows=316797606 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0 + Group By Operator [GBY_44] (rows=633595212 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_42] (rows=633595212 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_354] (rows=633595212 width=88) + Conds:RS_39._col0=RS_40._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_35] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_328] (rows=575995635 width=88) + predicate:ss_customer_sk is not null + TableScan [TS_33] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Select Operator [SEL_38] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_329] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_36] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_76] + Select Operator [SEL_74] (rows=1 width=8) + Output:["_col0"] + Group By Operator [GBY_73] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_72] + Group By Operator [GBY_71] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_68] (rows=348477374 width=88) + Group By Operator [GBY_67] (rows=348477374 width=88) + Output:["_col0"],keys:KEY._col0 + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0 + Group By Operator [GBY_65] (rows=696954748 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_63] (rows=696954748 width=88) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_356] (rows=696954748 width=88) + Conds:RS_60._col1=RS_61._col0(Inner),Output:["_col6"] + <-Map 27 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0 + Select Operator [SEL_56] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_332] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_54] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_355] (rows=633595212 width=88) + Conds:RS_57._col0=RS_58._col0(Inner),Output:["_col1"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col0 + Select Operator [SEL_50] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_330] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_48] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Map 26 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0 + Select Operator [SEL_53] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_331] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_51] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_103] + Select Operator [SEL_100] (rows=348477374 width=88) + Group By Operator [GBY_99] (rows=348477374 width=88) + Output:["_col0"],keys:KEY._col0 + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_98] + PartitionCols:_col0 + Group By Operator [GBY_97] (rows=696954748 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_95] (rows=696954748 width=88) Output:["_col0"] - Filter Operator [FIL_185] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_36] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + Merge Join Operator [MERGEJOIN_358] (rows=696954748 width=88) + Conds:RS_92._col1=RS_93._col0(Inner),Output:["_col6"] + <-Map 33 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col0 + Select Operator [SEL_88] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_335] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_86] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_92] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_357] (rows=633595212 width=88) + Conds:RS_89._col0=RS_90._col0(Inner),Output:["_col1"] + <-Map 28 [SIMPLE_EDGE] + SHUFFLE [RS_89] + PartitionCols:_col0 + Select Operator [SEL_82] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_333] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_80] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Map 32 [SIMPLE_EDGE] + SHUFFLE [RS_90] + PartitionCols:_col0 + Select Operator [SEL_85] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_334] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_83] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_60] + SHUFFLE [RS_116] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_203] (rows=348467716 width=135) - Conds:RS_57._col2=RS_58._col0(Inner),Output:["_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_367] (rows=348467716 width=135) + Conds:RS_113._col2=RS_114._col0(Inner),Output:["_col1","_col3","_col4"] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_58] + SHUFFLE [RS_114] PartitionCols:_col0 Group By Operator [GBY_31] (rows=58079562 width=88) Output:["_col0"],keys:_col1 @@ -312,7 +568,7 @@ Stage-0 Output:["_col1"] Filter Operator [FIL_26] (rows=116159124 width=88) predicate:(_col3 > 4) - Select Operator [SEL_194] (rows=348477374 width=88) + Select Operator [SEL_349] (rows=348477374 width=88) Output:["_col0","_col3"] Group By Operator [GBY_25] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 @@ -323,28 +579,28 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 Select Operator [SEL_21] (rows=696954748 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_197] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_353] (rows=696954748 width=88) Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col3","_col5","_col6"] <-Map 13 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_14] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_183] (rows=462000 width=1436) + Filter Operator [FIL_327] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_12] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_196] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_352] (rows=633595212 width=88) Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col3"] <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 Select Operator [SEL_11] (rows=36525 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_182] (rows=36525 width=1119) + Filter Operator [FIL_326] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"] @@ -353,30 +609,30 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_8] (rows=575995635 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_181] (rows=575995635 width=88) + Filter Operator [FIL_325] (rows=575995635 width=88) predicate:(ss_sold_date_sk is not null and ss_item_sk is not null) TableScan [TS_6] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_57] + SHUFFLE [RS_113] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_195] (rows=316788826 width=135) - Conds:RS_54._col0=RS_55._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_351] (rows=316788826 width=135) + Conds:RS_110._col0=RS_111._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_54] + SHUFFLE [RS_110] PartitionCols:_col0 Select Operator [SEL_2] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_179] (rows=287989836 width=135) + Filter Operator [FIL_323] (rows=287989836 width=135) predicate:cs_sold_date_sk is not null TableScan [TS_0] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_55] + SHUFFLE [RS_111] PartitionCols:_col0 Select Operator [SEL_5] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_180] (rows=18262 width=1119) + Filter Operator [FIL_324] (rows=18262 width=1119) predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] diff --git a/ql/src/test/results/clientpositive/perf/query30.q.out b/ql/src/test/results/clientpositive/perf/query30.q.out new file mode 100644 index 0000000..007a126 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query30.q.out @@ -0,0 +1,474 @@ +PREHOOK: query: explain with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2002 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2002 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) +Reducer 16 <- Map 21 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 23 <- Map 22 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (SIMPLE_EDGE) +Reducer 28 <- Map 27 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Reducer 29 <- Map 32 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (SIMPLE_EDGE) +Reducer 34 <- Map 33 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) +Reducer 35 <- Map 40 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 36 <- Reducer 35 (SIMPLE_EDGE) +Reducer 37 <- Reducer 36 (SIMPLE_EDGE), Reducer 44 (SIMPLE_EDGE) +Reducer 38 <- Reducer 37 (SIMPLE_EDGE) +Reducer 4 <- Reducer 19 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 42 <- Map 41 (SIMPLE_EDGE), Map 45 (SIMPLE_EDGE) +Reducer 43 <- Reducer 42 (SIMPLE_EDGE), Reducer 49 (SIMPLE_EDGE) +Reducer 44 <- Reducer 43 (SIMPLE_EDGE) +Reducer 47 <- Map 46 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE) +Reducer 48 <- Map 51 (SIMPLE_EDGE), Reducer 47 (SIMPLE_EDGE) +Reducer 49 <- Reducer 48 (SIMPLE_EDGE) +Reducer 5 <- Reducer 38 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 + File Output Operator [FS_179] + Limit [LIM_178] (rows=100 width=860) + Number of rows:100 + Select Operator [SEL_177] (rows=39042669 width=860) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_176] + Select Operator [SEL_175] (rows=39042669 width=860) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Merge Join Operator [MERGEJOIN_303] (rows=39042669 width=860) + Conds:RS_172._col0=RS_173._col0(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 38 [SIMPLE_EDGE] + SHUFFLE [RS_173] + PartitionCols:_col0 + Group By Operator [GBY_170] (rows=26620001 width=860) + Output:["_col0"],keys:KEY._col0 + <-Reducer 37 [SIMPLE_EDGE] + SHUFFLE [RS_169] + PartitionCols:_col0 + Group By Operator [GBY_168] (rows=53240002 width=860) + Output:["_col0"],keys:_col3 + Merge Join Operator [MERGEJOIN_302] (rows=53240002 width=860) + Conds:RS_164._col1=RS_165._col0(Inner),Output:["_col3"] + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_164] + PartitionCols:_col1 + Select Operator [SEL_126] (rows=22000000 width=1014) + Output:["_col1"] + Group By Operator [GBY_125] (rows=22000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_124] + PartitionCols:_col0, _col1 + Group By Operator [GBY_123] (rows=44000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 + Select Operator [SEL_122] (rows=44000000 width=1014) + Output:["_col7","_col1","_col3"] + Merge Join Operator [MERGEJOIN_293] (rows=44000000 width=1014) + Conds:RS_119._col2=RS_120._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 40 [SIMPLE_EDGE] + SHUFFLE [RS_120] + PartitionCols:_col0 + Select Operator [SEL_115] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_278] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_113] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_119] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_292] (rows=15838314 width=92) + Conds:RS_116._col0=RS_117._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 33 [SIMPLE_EDGE] + SHUFFLE [RS_116] + PartitionCols:_col0 + Select Operator [SEL_109] (rows=14398467 width=92) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_276] (rows=14398467 width=92) + predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null) + TableScan [TS_107] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] + <-Map 39 [SIMPLE_EDGE] + SHUFFLE [RS_117] + PartitionCols:_col0 + Select Operator [SEL_112] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_277] (rows=36524 width=1119) + predicate:((d_year = 2002) and d_date_sk is not null) + TableScan [TS_110] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 44 [SIMPLE_EDGE] + SHUFFLE [RS_165] + PartitionCols:_col0 + Group By Operator [GBY_162] (rows=48400001 width=860) + Output:["_col0"],keys:KEY._col0 + <-Reducer 43 [SIMPLE_EDGE] + SHUFFLE [RS_161] + PartitionCols:_col0 + Group By Operator [GBY_160] (rows=96800003 width=860) + Output:["_col0"],keys:_col5 + Merge Join Operator [MERGEJOIN_299] (rows=96800003 width=860) + Conds:RS_156._col0=RS_157._col0(Inner),Output:["_col5"] + <-Reducer 42 [SIMPLE_EDGE] + SHUFFLE [RS_156] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_294] (rows=88000001 width=860) + Conds:RS_153._col1=RS_154._col0(Inner),Output:["_col0"] + <-Map 41 [SIMPLE_EDGE] + SHUFFLE [RS_153] + PartitionCols:_col1 + Select Operator [SEL_129] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_279] (rows=80000000 width=860) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_127] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 45 [SIMPLE_EDGE] + SHUFFLE [RS_154] + PartitionCols:_col0 + Select Operator [SEL_132] (rows=20000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_280] (rows=20000000 width=1014) + predicate:((ca_state = 'IL') and ca_address_sk is not null) + TableScan [TS_130] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 49 [SIMPLE_EDGE] + SHUFFLE [RS_157] + PartitionCols:_col0 + Group By Operator [GBY_151] (rows=22000000 width=1014) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 48 [SIMPLE_EDGE] + SHUFFLE [RS_150] + PartitionCols:_col0, _col1 + Group By Operator [GBY_149] (rows=44000000 width=1014) + Output:["_col0","_col1"],keys:_col1, _col7 + Merge Join Operator [MERGEJOIN_296] (rows=44000000 width=1014) + Conds:RS_145._col2=RS_146._col0(Inner),Output:["_col1","_col7"] + <-Map 51 [SIMPLE_EDGE] + SHUFFLE [RS_146] + PartitionCols:_col0 + Select Operator [SEL_141] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_283] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_139] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 47 [SIMPLE_EDGE] + SHUFFLE [RS_145] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_295] (rows=15838314 width=92) + Conds:RS_142._col0=RS_143._col0(Inner),Output:["_col1","_col2"] + <-Map 46 [SIMPLE_EDGE] + SHUFFLE [RS_142] + PartitionCols:_col0 + Select Operator [SEL_135] (rows=14398467 width=92) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_281] (rows=14398467 width=92) + predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) + TableScan [TS_133] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk"] + <-Map 50 [SIMPLE_EDGE] + SHUFFLE [RS_143] + PartitionCols:_col0 + Select Operator [SEL_138] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_282] (rows=36524 width=1119) + predicate:((d_year = 2002) and d_date_sk is not null) + TableScan [TS_136] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_172] + PartitionCols:_col0 + Select Operator [SEL_106] (rows=35493335 width=860) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Filter Operator [FIL_105] (rows=35493335 width=860) + predicate:(_col18 > _col19) + Merge Join Operator [MERGEJOIN_301] (rows=106480005 width=860) + Conds:RS_102._col17=RS_103._col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col17","_col18","_col19"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_103] + PartitionCols:_col1 + Select Operator [SEL_95] (rows=13310000 width=860) + Output:["_col0","_col1"] + Group By Operator [GBY_94] (rows=13310000 width=860) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Group By Operator [GBY_89] (rows=26620001 width=860) + Output:["_col0"],keys:KEY._col0 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_88] + PartitionCols:_col0 + Group By Operator [GBY_87] (rows=53240002 width=860) + Output:["_col0"],keys:_col3 + Merge Join Operator [MERGEJOIN_300] (rows=53240002 width=860) + Conds:RS_83._col1=RS_84._col0(Inner),Output:["_col3"] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_83] + PartitionCols:_col1 + Select Operator [SEL_45] (rows=22000000 width=1014) + Output:["_col1"] + Group By Operator [GBY_44] (rows=22000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0, _col1 + Group By Operator [GBY_42] (rows=44000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 + Select Operator [SEL_41] (rows=44000000 width=1014) + Output:["_col7","_col1","_col3"] + Merge Join Operator [MERGEJOIN_288] (rows=44000000 width=1014) + Conds:RS_38._col2=RS_39._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_34] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_270] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_32] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_287] (rows=15838314 width=92) + Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=14398467 width=92) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_268] (rows=14398467 width=92) + predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null) + TableScan [TS_26] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_269] (rows=36524 width=1119) + predicate:((d_year = 2002) and d_date_sk is not null) + TableScan [TS_29] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_84] + PartitionCols:_col0 + Group By Operator [GBY_81] (rows=48400001 width=860) + Output:["_col0"],keys:KEY._col0 + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_80] + PartitionCols:_col0 + Group By Operator [GBY_79] (rows=96800003 width=860) + Output:["_col0"],keys:_col5 + Merge Join Operator [MERGEJOIN_298] (rows=96800003 width=860) + Conds:RS_75._col0=RS_76._col0(Inner),Output:["_col5"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_75] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_289] (rows=88000001 width=860) + Conds:RS_72._col1=RS_73._col0(Inner),Output:["_col0"] + <-Map 22 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col1 + Select Operator [SEL_48] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_271] (rows=80000000 width=860) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_46] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 26 [SIMPLE_EDGE] + SHUFFLE [RS_73] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=20000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_272] (rows=20000000 width=1014) + predicate:((ca_state = 'IL') and ca_address_sk is not null) + TableScan [TS_49] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_76] + PartitionCols:_col0 + Group By Operator [GBY_70] (rows=22000000 width=1014) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col0, _col1 + Group By Operator [GBY_68] (rows=44000000 width=1014) + Output:["_col0","_col1"],keys:_col1, _col7 + Merge Join Operator [MERGEJOIN_291] (rows=44000000 width=1014) + Conds:RS_64._col2=RS_65._col0(Inner),Output:["_col1","_col7"] + <-Map 32 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0 + Select Operator [SEL_60] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_275] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_58] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_290] (rows=15838314 width=92) + Conds:RS_61._col0=RS_62._col0(Inner),Output:["_col1","_col2"] + <-Map 27 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0 + Select Operator [SEL_54] (rows=14398467 width=92) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_273] (rows=14398467 width=92) + predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) + TableScan [TS_52] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk"] + <-Map 31 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col0 + Select Operator [SEL_57] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_274] (rows=36524 width=1119) + predicate:((d_year = 2002) and d_date_sk is not null) + TableScan [TS_55] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_102] + PartitionCols:_col17 + Merge Join Operator [MERGEJOIN_297] (rows=96800003 width=860) + Conds:RS_99._col0=RS_100._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col17","_col18"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_100] + PartitionCols:_col0 + Group By Operator [GBY_24] (rows=22000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=44000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col1, _col7 + Select Operator [SEL_21] (rows=44000000 width=1014) + Output:["_col1","_col7","_col3"] + Merge Join Operator [MERGEJOIN_286] (rows=44000000 width=1014) + Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_267] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_12] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_285] (rows=15838314 width=92) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_266] (rows=36524 width=1119) + predicate:((d_year = 2002) and d_date_sk is not null) + TableScan [TS_9] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=14398467 width=92) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_265] (rows=14398467 width=92) + predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) + TableScan [TS_6] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_99] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_284] (rows=88000001 width=860) + Conds:RS_96._col2=RS_97._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_96] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=80000000 width=860) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Filter Operator [FIL_263] (rows=80000000 width=860) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_day","c_birth_month","c_birth_year","c_birth_country","c_login","c_email_address","c_last_review_date"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_97] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=20000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_264] (rows=20000000 width=1014) + predicate:((ca_state = 'IL') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + diff --git a/ql/src/test/results/clientpositive/perf/query41.q.out b/ql/src/test/results/clientpositive/perf/query41.q.out new file mode 100644 index 0000000..b09f6f9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query41.q.out @@ -0,0 +1,232 @@ +PREHOOK: query: explain select distinct(i_product_name) + from item i1 + where i_manufact_id between 742 and 742+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'orchid' or i_color = 'papaya') and + (i_units = 'Pound' or i_units = 'Lb') and + (i_size = 'petite' or i_size = 'medium') + ) or + (i_category = 'Women' and + (i_color = 'burlywood' or i_color = 'navy') and + (i_units = 'Bundle' or i_units = 'Each') and + (i_size = 'N/A' or i_size = 'extra large') + ) or + (i_category = 'Men' and + (i_color = 'bisque' or i_color = 'azure') and + (i_units = 'N/A' or i_units = 'Tsp') and + (i_size = 'small' or i_size = 'large') + ) or + (i_category = 'Men' and + (i_color = 'chocolate' or i_color = 'cornflower') and + (i_units = 'Bunch' or i_units = 'Gross') and + (i_size = 'petite' or i_size = 'medium') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'salmon' or i_color = 'midnight') and + (i_units = 'Oz' or i_units = 'Box') and + (i_size = 'petite' or i_size = 'medium') + ) or + (i_category = 'Women' and + (i_color = 'snow' or i_color = 'steel') and + (i_units = 'Carton' or i_units = 'Tbl') and + (i_size = 'N/A' or i_size = 'extra large') + ) or + (i_category = 'Men' and + (i_color = 'purple' or i_color = 'gainsboro') and + (i_units = 'Dram' or i_units = 'Unknown') and + (i_size = 'small' or i_size = 'large') + ) or + (i_category = 'Men' and + (i_color = 'metallic' or i_color = 'forest') and + (i_units = 'Gram' or i_units = 'Ounce') and + (i_size = 'petite' or i_size = 'medium') + )))) > 0 + order by i_product_name + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct(i_product_name) + from item i1 + where i_manufact_id between 742 and 742+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'orchid' or i_color = 'papaya') and + (i_units = 'Pound' or i_units = 'Lb') and + (i_size = 'petite' or i_size = 'medium') + ) or + (i_category = 'Women' and + (i_color = 'burlywood' or i_color = 'navy') and + (i_units = 'Bundle' or i_units = 'Each') and + (i_size = 'N/A' or i_size = 'extra large') + ) or + (i_category = 'Men' and + (i_color = 'bisque' or i_color = 'azure') and + (i_units = 'N/A' or i_units = 'Tsp') and + (i_size = 'small' or i_size = 'large') + ) or + (i_category = 'Men' and + (i_color = 'chocolate' or i_color = 'cornflower') and + (i_units = 'Bunch' or i_units = 'Gross') and + (i_size = 'petite' or i_size = 'medium') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'salmon' or i_color = 'midnight') and + (i_units = 'Oz' or i_units = 'Box') and + (i_size = 'petite' or i_size = 'medium') + ) or + (i_category = 'Women' and + (i_color = 'snow' or i_color = 'steel') and + (i_units = 'Carton' or i_units = 'Tbl') and + (i_size = 'N/A' or i_size = 'extra large') + ) or + (i_category = 'Men' and + (i_color = 'purple' or i_color = 'gainsboro') and + (i_units = 'Dram' or i_units = 'Unknown') and + (i_size = 'small' or i_size = 'large') + ) or + (i_category = 'Men' and + (i_color = 'metallic' or i_color = 'forest') and + (i_units = 'Gram' or i_units = 'Ounce') and + (i_size = 'petite' or i_size = 'medium') + )))) > 0 + order by i_product_name + limit 100 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Map 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 + File Output Operator [FS_60] + Limit [LIM_59] (rows=100 width=1436) + Number of rows:100 + Select Operator [SEL_58] (rows=69879 width=1436) + Output:["_col0"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_57] + Group By Operator [GBY_55] (rows=69879 width=1436) + Output:["_col0"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:_col0 + Group By Operator [GBY_53] (rows=139758 width=1436) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_78] (rows=139758 width=1436) + Conds:RS_49._col0=RS_50._col0(Left Outer),Output:["_col1"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col0 + Group By Operator [GBY_47] (rows=127053 width=1436) + Output:["_col0"],keys:KEY._col0 + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0 + Group By Operator [GBY_45] (rows=254107 width=1436) + Output:["_col0"],keys:_col5 + Merge Join Operator [MERGEJOIN_77] (rows=254107 width=1436) + Conds:RS_41._col1=RS_42._col0(Inner),Output:["_col5"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col1 + Select Operator [SEL_33] (rows=231007 width=1436) + Output:["_col1"] + Filter Operator [FIL_73] (rows=231007 width=1436) + predicate:(((i_category = 'Women') and ((i_color = 'orchid') or (i_color = 'papaya')) and ((i_units = 'Pound') or (i_units = 'Lb')) and ((i_size = 'petite') or (i_size = 'medium'))) or ((i_category = 'Women') and ((i_color = 'burlywood') or (i_color = 'navy')) and ((i_units = 'Bundle') or (i_units = 'Each')) and ((i_size = 'N/A') or (i_size = 'extra large'))) or ((i_category = 'Men') and ((i_color = 'bisque') or (i_color = 'azure')) and ((i_units = 'N/A') or (i_units = 'Tsp')) and ((i_size = 'small') or (i_size = 'large'))) or ((i_category = 'Men') and ((i_color = 'chocolate') or (i_color = 'cornflower')) and ((i_units = 'Bunch') or (i_units = 'Gross')) and ((i_size = 'petite') or (i_size = 'medium'))) or ((i_category = 'Women') and ((i_color = 'salmon') or (i_color = 'midnight')) and ((i_units = 'Oz') or (i_units = 'Box')) and ((i_size = 'petite') or (i_size = 'medium'))) or ((i_category = 'Women') and ((i_color = 'snow') or (i_color = 'steel')) and ((i_units = 'Carton') or (i_units = 'Tbl')) and ((i_size = 'N/A') or (i_size = 'extra large'))) or ((i_category = 'Men') and ((i_color = 'purple') or (i_color = 'gainsboro')) and ((i_units = 'Dram') or (i_units = 'Unknown')) and ((i_size = 'small') or (i_size = 'large'))) or ((i_category = 'Men') and ((i_color = 'metallic') or (i_color = 'forest')) and ((i_units = 'Gram') or (i_units = 'Ounce')) and ((i_size = 'petite') or (i_size = 'medium')))) + TableScan [TS_31] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_category","i_manufact","i_size","i_color","i_units"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Group By Operator [GBY_39] (rows=25666 width=1436) + Output:["_col0"],keys:KEY._col0 + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col0 + Group By Operator [GBY_37] (rows=51333 width=1436) + Output:["_col0"],keys:i_manufact + Select Operator [SEL_36] (rows=51333 width=1436) + Output:["i_manufact"] + Filter Operator [FIL_74] (rows=51333 width=1436) + predicate:i_manufact_id BETWEEN 742 AND 782 + TableScan [TS_34] (rows=462000 width=1436) + default@item,i1,Tbl:COMPLETE,Col:NONE,Output:["i_manufact_id","i_manufact"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col0 + Select Operator [SEL_30] (rows=56466 width=1436) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_76] (rows=56466 width=1436) + Conds:RS_27._col1=RS_28._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col1 + Select Operator [SEL_2] (rows=51333 width=1436) + Output:["_col1","_col2"] + Filter Operator [FIL_70] (rows=51333 width=1436) + predicate:i_manufact_id BETWEEN 742 AND 782 + TableScan [TS_0] (rows=462000 width=1436) + default@item,i1,Tbl:COMPLETE,Col:NONE,Output:["i_manufact_id","i_manufact","i_product_name"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=21175 width=1436) + Output:["_col0"] + Filter Operator [FIL_25] (rows=21175 width=1436) + predicate:(sq_count_check(_col1) > 0) + Group By Operator [GBY_24] (rows=63526 width=1436) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Group By Operator [GBY_19] (rows=127053 width=1436) + Output:["_col0"],keys:KEY._col0 + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Group By Operator [GBY_17] (rows=254107 width=1436) + Output:["_col0"],keys:_col5 + Merge Join Operator [MERGEJOIN_75] (rows=254107 width=1436) + Conds:RS_13._col1=RS_14._col0(Inner),Output:["_col5"] + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_5] (rows=231007 width=1436) + Output:["_col1"] + Filter Operator [FIL_71] (rows=231007 width=1436) + predicate:(((i_category = 'Women') and ((i_color = 'orchid') or (i_color = 'papaya')) and ((i_units = 'Pound') or (i_units = 'Lb')) and ((i_size = 'petite') or (i_size = 'medium'))) or ((i_category = 'Women') and ((i_color = 'burlywood') or (i_color = 'navy')) and ((i_units = 'Bundle') or (i_units = 'Each')) and ((i_size = 'N/A') or (i_size = 'extra large'))) or ((i_category = 'Men') and ((i_color = 'bisque') or (i_color = 'azure')) and ((i_units = 'N/A') or (i_units = 'Tsp')) and ((i_size = 'small') or (i_size = 'large'))) or ((i_category = 'Men') and ((i_color = 'chocolate') or (i_color = 'cornflower')) and ((i_units = 'Bunch') or (i_units = 'Gross')) and ((i_size = 'petite') or (i_size = 'medium'))) or ((i_category = 'Women') and ((i_color = 'salmon') or (i_color = 'midnight')) and ((i_units = 'Oz') or (i_units = 'Box')) and ((i_size = 'petite') or (i_size = 'medium'))) or ((i_category = 'Women') and ((i_color = 'snow') or (i_color = 'steel')) and ((i_units = 'Carton') or (i_units = 'Tbl')) and ((i_size = 'N/A') or (i_size = 'extra large'))) or ((i_category = 'Men') and ((i_color = 'purple') or (i_color = 'gainsboro')) and ((i_units = 'Dram') or (i_units = 'Unknown')) and ((i_size = 'small') or (i_size = 'large'))) or ((i_category = 'Men') and ((i_color = 'metallic') or (i_color = 'forest')) and ((i_units = 'Gram') or (i_units = 'Ounce')) and ((i_size = 'petite') or (i_size = 'medium')))) + TableScan [TS_3] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_category","i_manufact","i_size","i_color","i_units"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_11] (rows=25666 width=1436) + Output:["_col0"],keys:KEY._col0 + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col0 + Group By Operator [GBY_9] (rows=51333 width=1436) + Output:["_col0"],keys:i_manufact + Select Operator [SEL_8] (rows=51333 width=1436) + Output:["i_manufact"] + Filter Operator [FIL_72] (rows=51333 width=1436) + predicate:i_manufact_id BETWEEN 742 AND 782 + TableScan [TS_6] (rows=462000 width=1436) + default@item,i1,Tbl:COMPLETE,Col:NONE,Output:["i_manufact_id","i_manufact"] + diff --git a/ql/src/test/results/clientpositive/perf/query6.q.out b/ql/src/test/results/clientpositive/perf/query6.q.out new file mode 100644 index 0000000..b42eb65 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query6.q.out @@ -0,0 +1,534 @@ +Warning: Shuffle Join MERGEJOIN[341][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[327][tables = [$hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 27' is a cross product +Warning: Shuffle Join MERGEJOIN[334][tables = [$hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 47' is a cross product +PREHOOK: query: explain select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2000 + and d_moy = 2 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2000 + and d_moy = 2 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt + limit 100 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 12 <- Map 17 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 18 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (SIMPLE_EDGE) +Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 40 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (SIMPLE_EDGE) +Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 31 <- Map 30 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE) +Reducer 32 <- Map 37 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 33 <- Map 38 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 35 <- Map 34 (SIMPLE_EDGE) +Reducer 36 <- Reducer 35 (SIMPLE_EDGE) +Reducer 4 <- Reducer 20 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 40 <- Map 39 (SIMPLE_EDGE) +Reducer 42 <- Map 41 (SIMPLE_EDGE), Reducer 48 (SIMPLE_EDGE) +Reducer 43 <- Reducer 42 (SIMPLE_EDGE) +Reducer 45 <- Map 44 (SIMPLE_EDGE), Map 49 (SIMPLE_EDGE) +Reducer 46 <- Reducer 45 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE) +Reducer 47 <- Reducer 46 (SIMPLE_EDGE), Reducer 60 (SIMPLE_EDGE) +Reducer 48 <- Reducer 47 (SIMPLE_EDGE) +Reducer 5 <- Reducer 23 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 51 <- Map 50 (SIMPLE_EDGE), Reducer 56 (SIMPLE_EDGE) +Reducer 52 <- Map 57 (SIMPLE_EDGE), Reducer 51 (SIMPLE_EDGE) +Reducer 53 <- Map 58 (SIMPLE_EDGE), Reducer 52 (SIMPLE_EDGE) +Reducer 55 <- Map 54 (SIMPLE_EDGE) +Reducer 56 <- Reducer 55 (SIMPLE_EDGE) +Reducer 6 <- Reducer 43 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 60 <- Map 59 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 8 + File Output Operator [FS_212] + Limit [LIM_211] (rows=100 width=1208) + Number of rows:100 + Select Operator [SEL_210] (rows=470574622181 width=1208) + Output:["_col0","_col1"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_209] + Filter Operator [FIL_207] (rows=470574622181 width=1208) + predicate:(_col1 >= 10) + Group By Operator [GBY_206] (rows=1411723866545 width=1208) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_205] + PartitionCols:_col0 + Group By Operator [GBY_204] (rows=2823447733090 width=1208) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 + Merge Join Operator [MERGEJOIN_343] (rows=2823447733090 width=1208) + Conds:RS_200._col11=RS_201._col0(Left Outer),Output:["_col1"] + <-Reducer 43 [SIMPLE_EDGE] + SHUFFLE [RS_201] + PartitionCols:_col0 + Group By Operator [GBY_198] (rows=1925077958109 width=1208) + Output:["_col0"],keys:KEY._col0 + <-Reducer 42 [SIMPLE_EDGE] + SHUFFLE [RS_197] + PartitionCols:_col0 + Group By Operator [GBY_196] (rows=3850155916219 width=1208) + Output:["_col0"],keys:_col2 + Merge Join Operator [MERGEJOIN_335] (rows=3850155916219 width=1208) + Conds:RS_192._col1=RS_193._col0(Inner),Output:["_col2"] + <-Map 41 [SIMPLE_EDGE] + SHUFFLE [RS_192] + PartitionCols:_col1 + Select Operator [SEL_133] (rows=462000 width=1436) + Output:["_col1"] + TableScan [TS_132] (rows=462000 width=1436) + default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_category"] + <-Reducer 48 [SIMPLE_EDGE] + SHUFFLE [RS_193] + PartitionCols:_col0 + Group By Operator [GBY_190] (rows=3500141666154 width=1208) + Output:["_col0"],keys:KEY._col0 + <-Reducer 47 [SIMPLE_EDGE] + SHUFFLE [RS_189] + PartitionCols:_col0 + Group By Operator [GBY_188] (rows=7000283332309 width=1208) + Output:["_col0"],keys:_col10 + Merge Join Operator [MERGEJOIN_334] (rows=7000283332309 width=1208) + Conds:(Left Outer),Output:["_col10"] + <-Reducer 46 [SIMPLE_EDGE] + SHUFFLE [RS_184] + Merge Join Operator [MERGEJOIN_333] (rows=766650239 width=88) + Conds:RS_181._col0=RS_182._col2(Inner),Output:["_col10"] + <-Reducer 45 [SIMPLE_EDGE] + SHUFFLE [RS_181] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_329] (rows=88000001 width=860) + Conds:RS_178._col1=RS_179._col0(Inner),Output:["_col0"] + <-Map 44 [SIMPLE_EDGE] + SHUFFLE [RS_178] + PartitionCols:_col1 + Select Operator [SEL_136] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_314] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_134] (rows=80000000 width=860) + default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 49 [SIMPLE_EDGE] + SHUFFLE [RS_179] + PartitionCols:_col0 + Select Operator [SEL_139] (rows=40000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_315] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_137] (rows=40000000 width=1014) + default@customer_address,a,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk"] + <-Reducer 53 [SIMPLE_EDGE] + SHUFFLE [RS_182] + PartitionCols:_col2 + Select Operator [SEL_170] (rows=696954748 width=88) + Output:["_col2","_col7"] + Merge Join Operator [MERGEJOIN_332] (rows=696954748 width=88) + Conds:RS_167._col4=RS_168._col0(Inner),Output:["_col5","_col7"] + <-Map 58 [SIMPLE_EDGE] + SHUFFLE [RS_168] + PartitionCols:_col0 + Select Operator [SEL_160] (rows=462000 width=1436) + Output:["_col0","_col1"] + Filter Operator [FIL_320] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_158] (rows=462000 width=1436) + default@item,i,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_category"] + <-Reducer 52 [SIMPLE_EDGE] + SHUFFLE [RS_167] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_331] (rows=633595212 width=88) + Conds:RS_164._col0=RS_165._col0(Inner),Output:["_col4","_col5"] + <-Map 57 [SIMPLE_EDGE] + SHUFFLE [RS_165] + PartitionCols:_col0 + Select Operator [SEL_157] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_319] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null) + TableScan [TS_155] (rows=575995635 width=88) + default@store_sales,s,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] + <-Reducer 51 [SIMPLE_EDGE] + SHUFFLE [RS_164] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_330] (rows=80353 width=1119) + Conds:RS_161.UDFToLong(_col1)=RS_162._col0(Inner),Output:["_col0"] + <-Map 50 [SIMPLE_EDGE] + SHUFFLE [RS_161] + PartitionCols:UDFToLong(_col1) + Select Operator [SEL_142] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_316] (rows=73049 width=1119) + predicate:(d_date_sk is not null and d_month_seq is not null) + TableScan [TS_140] (rows=73049 width=1119) + default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + <-Reducer 56 [SIMPLE_EDGE] + SHUFFLE [RS_162] + PartitionCols:_col0 + Select Operator [SEL_154] (rows=1 width=8) + Output:["_col0"] + Group By Operator [GBY_153] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 55 [SIMPLE_EDGE] + SHUFFLE [RS_152] + Group By Operator [GBY_151] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_149] (rows=9131 width=1119) + Group By Operator [GBY_148] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 54 [SIMPLE_EDGE] + SHUFFLE [RS_147] + PartitionCols:_col0 + Group By Operator [GBY_146] (rows=18262 width=1119) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_145] (rows=18262 width=1119) + Output:["d_month_seq"] + Filter Operator [FIL_318] (rows=18262 width=1119) + predicate:((d_year = 2000) and (d_moy = 2)) + TableScan [TS_143] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 60 [SIMPLE_EDGE] + SHUFFLE [RS_185] + Select Operator [SEL_177] (rows=9131 width=1119) + Group By Operator [GBY_176] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 59 [SIMPLE_EDGE] + SHUFFLE [RS_175] + PartitionCols:_col0 + Group By Operator [GBY_174] (rows=18262 width=1119) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_173] (rows=18262 width=1119) + Output:["d_month_seq"] + Filter Operator [FIL_321] (rows=18262 width=1119) + predicate:((d_year = 2000) and (d_moy = 2)) + TableScan [TS_171] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_200] + PartitionCols:_col11 + Select Operator [SEL_131] (rows=2566770610813 width=1208) + Output:["_col1","_col11"] + Filter Operator [FIL_130] (rows=2566770610813 width=1208) + predicate:(_col10 > (1.2 * _col14)) + Merge Join Operator [MERGEJOIN_342] (rows=7700311832439 width=1208) + Conds:RS_127._col11=RS_128._col1(Left Outer),Output:["_col1","_col10","_col11","_col14"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_128] + PartitionCols:_col1 + Select Operator [SEL_126] (rows=962538979054 width=1208) + Output:["_col0","_col1"] + Group By Operator [GBY_125] (rows=962538979054 width=1208) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Group By Operator [GBY_120] (rows=1925077958109 width=1208) + Output:["_col0"],keys:KEY._col0 + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_119] + PartitionCols:_col0 + Group By Operator [GBY_118] (rows=3850155916219 width=1208) + Output:["_col0"],keys:_col2 + Merge Join Operator [MERGEJOIN_328] (rows=3850155916219 width=1208) + Conds:RS_114._col1=RS_115._col0(Inner),Output:["_col2"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_114] + PartitionCols:_col1 + Select Operator [SEL_55] (rows=462000 width=1436) + Output:["_col1"] + TableScan [TS_54] (rows=462000 width=1436) + default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_category"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_115] + PartitionCols:_col0 + Group By Operator [GBY_112] (rows=3500141666154 width=1208) + Output:["_col0"],keys:KEY._col0 + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_111] + PartitionCols:_col0 + Group By Operator [GBY_110] (rows=7000283332309 width=1208) + Output:["_col0"],keys:_col10 + Merge Join Operator [MERGEJOIN_327] (rows=7000283332309 width=1208) + Conds:(Left Outer),Output:["_col10"] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_106] + Merge Join Operator [MERGEJOIN_326] (rows=766650239 width=88) + Conds:RS_103._col0=RS_104._col2(Inner),Output:["_col10"] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_103] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_322] (rows=88000001 width=860) + Conds:RS_100._col1=RS_101._col0(Inner),Output:["_col0"] + <-Map 24 [SIMPLE_EDGE] + SHUFFLE [RS_100] + PartitionCols:_col1 + Select Operator [SEL_58] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_305] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_56] (rows=80000000 width=860) + default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 29 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col0 + Select Operator [SEL_61] (rows=40000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_306] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_59] (rows=40000000 width=1014) + default@customer_address,a,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk"] + <-Reducer 33 [SIMPLE_EDGE] + SHUFFLE [RS_104] + PartitionCols:_col2 + Select Operator [SEL_92] (rows=696954748 width=88) + Output:["_col2","_col7"] + Merge Join Operator [MERGEJOIN_325] (rows=696954748 width=88) + Conds:RS_89._col4=RS_90._col0(Inner),Output:["_col5","_col7"] + <-Map 38 [SIMPLE_EDGE] + SHUFFLE [RS_90] + PartitionCols:_col0 + Select Operator [SEL_82] (rows=462000 width=1436) + Output:["_col0","_col1"] + Filter Operator [FIL_311] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_80] (rows=462000 width=1436) + default@item,i,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_category"] + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_89] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_324] (rows=633595212 width=88) + Conds:RS_86._col0=RS_87._col0(Inner),Output:["_col4","_col5"] + <-Map 37 [SIMPLE_EDGE] + SHUFFLE [RS_87] + PartitionCols:_col0 + Select Operator [SEL_79] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_310] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null) + TableScan [TS_77] (rows=575995635 width=88) + default@store_sales,s,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_86] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_323] (rows=80353 width=1119) + Conds:RS_83.UDFToLong(_col1)=RS_84._col0(Inner),Output:["_col0"] + <-Map 30 [SIMPLE_EDGE] + SHUFFLE [RS_83] + PartitionCols:UDFToLong(_col1) + Select Operator [SEL_64] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_307] (rows=73049 width=1119) + predicate:(d_date_sk is not null and d_month_seq is not null) + TableScan [TS_62] (rows=73049 width=1119) + default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_84] + PartitionCols:_col0 + Select Operator [SEL_76] (rows=1 width=8) + Output:["_col0"] + Group By Operator [GBY_75] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_74] + Group By Operator [GBY_73] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_71] (rows=9131 width=1119) + Group By Operator [GBY_70] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 34 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col0 + Group By Operator [GBY_68] (rows=18262 width=1119) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_67] (rows=18262 width=1119) + Output:["d_month_seq"] + Filter Operator [FIL_309] (rows=18262 width=1119) + predicate:((d_year = 2000) and (d_moy = 2)) + TableScan [TS_65] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 40 [SIMPLE_EDGE] + SHUFFLE [RS_107] + Select Operator [SEL_99] (rows=9131 width=1119) + Group By Operator [GBY_98] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 39 [SIMPLE_EDGE] + SHUFFLE [RS_97] + PartitionCols:_col0 + Group By Operator [GBY_96] (rows=18262 width=1119) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_95] (rows=18262 width=1119) + Output:["d_month_seq"] + Filter Operator [FIL_312] (rows=18262 width=1119) + predicate:((d_year = 2000) and (d_moy = 2)) + TableScan [TS_93] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_127] + PartitionCols:_col11 + Select Operator [SEL_53] (rows=7000283332309 width=1208) + Output:["_col1","_col10","_col11"] + Merge Join Operator [MERGEJOIN_341] (rows=7000283332309 width=1208) + Conds:(Left Outer),Output:["_col3","_col11","_col12"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_51] + Select Operator [SEL_43] (rows=9131 width=1119) + Group By Operator [GBY_42] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 19 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Group By Operator [GBY_40] (rows=18262 width=1119) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_39] (rows=18262 width=1119) + Output:["d_month_seq"] + Filter Operator [FIL_303] (rows=18262 width=1119) + predicate:((d_year = 2000) and (d_moy = 2)) + TableScan [TS_37] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_50] + Merge Join Operator [MERGEJOIN_340] (rows=766650239 width=88) + Conds:RS_47._col0=RS_48._col2(Inner),Output:["_col3","_col11","_col12"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col2 + Select Operator [SEL_36] (rows=696954748 width=88) + Output:["_col2","_col7","_col8"] + Merge Join Operator [MERGEJOIN_339] (rows=696954748 width=88) + Conds:RS_33._col4=RS_34._col0(Inner),Output:["_col5","_col7","_col8"] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_302] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_24] (rows=462000 width=1436) + default@item,i,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_category"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_338] (rows=633595212 width=88) + Conds:RS_30._col0=RS_31._col0(Inner),Output:["_col4","_col5"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col0 + Select Operator [SEL_23] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_301] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null) + TableScan [TS_21] (rows=575995635 width=88) + default@store_sales,s,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_337] (rows=80353 width=1119) + Conds:RS_27.UDFToLong(_col1)=RS_28._col0(Inner),Output:["_col0"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:UDFToLong(_col1) + Select Operator [SEL_8] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_298] (rows=73049 width=1119) + predicate:(d_date_sk is not null and d_month_seq is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=1 width=8) + Output:["_col0"] + Group By Operator [GBY_19] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_15] (rows=9131 width=1119) + Group By Operator [GBY_14] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Group By Operator [GBY_12] (rows=18262 width=1119) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_11] (rows=18262 width=1119) + Output:["d_month_seq"] + Filter Operator [FIL_300] (rows=18262 width=1119) + predicate:((d_year = 2000) and (d_moy = 2)) + TableScan [TS_9] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_47] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_336] (rows=88000001 width=860) + Conds:RS_44._col1=RS_45._col0(Inner),Output:["_col0","_col3"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col1 + Select Operator [SEL_2] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_296] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_297] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,a,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + diff --git a/ql/src/test/results/clientpositive/perf/query81.q.out b/ql/src/test/results/clientpositive/perf/query81.q.out new file mode 100644 index 0000000..3229a8e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query81.q.out @@ -0,0 +1,316 @@ +PREHOOK: query: explain with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 15 <- Map 20 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE) +Reducer 27 <- Map 26 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE) +Reducer 28 <- Map 31 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 18 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 8 <- Map 11 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 9 <- Map 12 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 + File Output Operator [FS_105] + Limit [LIM_104] (rows=100 width=860) + Number of rows:100 + Select Operator [SEL_103] (rows=35493335 width=860) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_102] + Select Operator [SEL_101] (rows=35493335 width=860) + Output:["_col0","_col1","_col11","_col12","_col13","_col14","_col15","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Filter Operator [FIL_100] (rows=35493335 width=860) + predicate:(_col20 > _col21) + Merge Join Operator [MERGEJOIN_180] (rows=106480005 width=860) + Conds:RS_97._col19=RS_98._col1(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col20","_col21"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_98] + PartitionCols:_col1 + Select Operator [SEL_90] (rows=26620001 width=860) + Output:["_col0","_col1"] + Group By Operator [GBY_89] (rows=26620001 width=860) + Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_88] + PartitionCols:_col0 + Group By Operator [GBY_87] (rows=53240002 width=860) + Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col3 + Select Operator [SEL_86] (rows=53240002 width=860) + Output:["_col3","_col2"] + Merge Join Operator [MERGEJOIN_179] (rows=53240002 width=860) + Conds:RS_83._col1=RS_84._col0(Inner),Output:["_col2","_col3"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_83] + PartitionCols:_col1 + Select Operator [SEL_45] (rows=22000000 width=1014) + Output:["_col1","_col2"] + Group By Operator [GBY_44] (rows=22000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0, _col1 + Group By Operator [GBY_42] (rows=44000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 + Select Operator [SEL_41] (rows=44000000 width=1014) + Output:["_col7","_col1","_col3"] + Merge Join Operator [MERGEJOIN_173] (rows=44000000 width=1014) + Conds:RS_38._col2=RS_39._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_34] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_163] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_32] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_172] (rows=31678769 width=106) + Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=28798881 width=106) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_161] (rows=28798881 width=106) + predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null) + TableScan [TS_26] (rows=28798881 width=106) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] + <-Map 19 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_162] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_29] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_84] + PartitionCols:_col0 + Group By Operator [GBY_81] (rows=48400001 width=860) + Output:["_col0"],keys:KEY._col0 + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_80] + PartitionCols:_col0 + Group By Operator [GBY_79] (rows=96800003 width=860) + Output:["_col0"],keys:_col5 + Merge Join Operator [MERGEJOIN_178] (rows=96800003 width=860) + Conds:RS_75._col0=RS_76._col0(Inner),Output:["_col5"] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_75] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_174] (rows=88000001 width=860) + Conds:RS_72._col1=RS_73._col0(Inner),Output:["_col0"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col1 + Select Operator [SEL_48] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_164] (rows=80000000 width=860) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_46] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 25 [SIMPLE_EDGE] + SHUFFLE [RS_73] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=20000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_165] (rows=20000000 width=1014) + predicate:((ca_state = 'IL') and ca_address_sk is not null) + TableScan [TS_49] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_76] + PartitionCols:_col0 + Group By Operator [GBY_70] (rows=22000000 width=1014) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col0, _col1 + Group By Operator [GBY_68] (rows=44000000 width=1014) + Output:["_col0","_col1"],keys:_col1, _col7 + Merge Join Operator [MERGEJOIN_176] (rows=44000000 width=1014) + Conds:RS_64._col2=RS_65._col0(Inner),Output:["_col1","_col7"] + <-Map 31 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0 + Select Operator [SEL_60] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_168] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_58] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_175] (rows=31678769 width=106) + Conds:RS_61._col0=RS_62._col0(Inner),Output:["_col1","_col2"] + <-Map 26 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0 + Select Operator [SEL_54] (rows=28798881 width=106) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_166] (rows=28798881 width=106) + predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) + TableScan [TS_52] (rows=28798881 width=106) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk"] + <-Map 30 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col0 + Select Operator [SEL_57] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_167] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_55] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_97] + PartitionCols:_col19 + Merge Join Operator [MERGEJOIN_177] (rows=96800003 width=860) + Conds:RS_94._col0=RS_95._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col19","_col20"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_95] + PartitionCols:_col0 + Group By Operator [GBY_24] (rows=22000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=44000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col1, _col7 + Select Operator [SEL_21] (rows=44000000 width=1014) + Output:["_col1","_col7","_col3"] + Merge Join Operator [MERGEJOIN_171] (rows=44000000 width=1014) + Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_160] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_12] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_170] (rows=31678769 width=106) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_159] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_9] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=28798881 width=106) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_158] (rows=28798881 width=106) + predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) + TableScan [TS_6] (rows=28798881 width=106) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_169] (rows=88000001 width=860) + Conds:RS_91._col2=RS_92._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_91] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=80000000 width=860) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_156] (rows=80000000 width=860) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"] + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_92] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=20000000 width=1014) + Output:["_col0","_col1","_col10","_col11","_col2","_col3","_col4","_col5","_col6","_col8","_col9"] + Filter Operator [FIL_157] (rows=20000000 width=1014) + predicate:((ca_state = 'IL') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"] +