diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index e03e96ff12..31278310db 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -32,6 +32,7 @@ import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Aggregate.Group; import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Exchange; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; @@ -62,6 +63,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; @@ -88,7 +90,7 @@ private Aggregate groupBy; private Filter having; private RelNode select; - private Sort orderLimit; + private RelNode orderLimit; private Schema schema; @@ -255,7 +257,7 @@ private ASTNode convert() throws CalciteSemanticException { * its parent. * 8. Limit */ - convertOrderLimitToASTNode((HiveSortLimit) orderLimit); + convertOrderToASTNode(orderLimit); return hiveAST.getAST(); } @@ -275,75 +277,107 @@ private ASTNode buildUDTFAST(String functionName, List children) { return node; } - private void convertOrderLimitToASTNode(HiveSortLimit order) { - if (order != null) { - HiveSortLimit hiveSortLimit = order; - if (!hiveSortLimit.getCollation().getFieldCollations().isEmpty()) { - // 1 Add order by token - ASTNode orderAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); - - schema = new Schema(hiveSortLimit); - Map obRefToCallMap = hiveSortLimit.getInputRefToCallMap(); - RexNode obExpr; - ASTNode astCol; - for (RelFieldCollation c : hiveSortLimit.getCollation().getFieldCollations()) { - - // 2 Add Direction token - ASTNode directionAST = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder - .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder - .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); - ASTNode nullDirectionAST; - // Null direction - if (c.nullDirection == RelFieldCollation.NullDirection.FIRST) { - nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); - directionAST.addChild(nullDirectionAST); - } else if (c.nullDirection == RelFieldCollation.NullDirection.LAST) { - nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); - directionAST.addChild(nullDirectionAST); - } else { - // Default - if (c.getDirection() == RelFieldCollation.Direction.ASCENDING) { - nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); - directionAST.addChild(nullDirectionAST); - } else { - nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); - directionAST.addChild(nullDirectionAST); - } - } + private void convertOrderToASTNode(RelNode node) { + if (node == null) { + return; + } - // 3 Convert OB expr (OB Expr is usually an input ref except for top - // level OB; top level OB will have RexCall kept in a map.) - obExpr = null; - if (obRefToCallMap != null) { - obExpr = obRefToCallMap.get(c.getFieldIndex()); - } + if (node instanceof HiveSortLimit) { + convertOrderLimitToASTNode((HiveSortLimit) node); + } else if (node instanceof HiveSortExchange) { + convertSortToASTNode((HiveSortExchange) node); + } + } - if (obExpr != null) { - astCol = obExpr.accept(new RexVisitor(schema, false, order.getCluster().getRexBuilder())); - } else { - ColumnInfo cI = schema.get(c.getFieldIndex()); - /* - * The RowResolver setup for Select drops Table associations. So - * setup ASTNode on unqualified name. - */ - astCol = ASTBuilder.unqualifiedName(cI.column); - } + private void convertOrderLimitToASTNode(HiveSortLimit hiveSortLimit) { + List fieldCollations = hiveSortLimit.getCollation().getFieldCollations(); + convertFieldCollationsToASTNode(hiveSortLimit, new Schema(hiveSortLimit), fieldCollations, + hiveSortLimit.getInputRefToCallMap(), HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); - // 4 buildup the ob expr AST - nullDirectionAST.addChild(astCol); - orderAst.addChild(directionAST); + RexNode offsetExpr = hiveSortLimit.getOffsetExpr(); + RexNode fetchExpr = hiveSortLimit.getFetchExpr(); + if (fetchExpr != null) { + Object offset = (offsetExpr == null) ? Integer.valueOf(0) : ((RexLiteral) offsetExpr).getValue2(); + Object fetch = ((RexLiteral) fetchExpr).getValue2(); + hiveAST.limit = ASTBuilder.limit(offset, fetch); + } + } + + private void convertSortToASTNode(HiveSortExchange hiveSortExchange) { + List fieldCollations = hiveSortExchange.getCollation().getFieldCollations(); + convertFieldCollationsToASTNode(hiveSortExchange, new Schema(hiveSortExchange), fieldCollations, + null, HiveParser.TOK_SORTBY, "TOK_SORTBY"); + + // TODO: sort by limit +// RexNode offsetExpr = hiveSortExchange.getOffsetExpr(); +// RexNode fetchExpr = hiveSortExchange.getFetchExpr(); +// if (fetchExpr != null) { +// Object offset = (offsetExpr == null) ? Integer.valueOf(0) : ((RexLiteral) offsetExpr).getValue2(); +// Object fetch = ((RexLiteral) fetchExpr).getValue2(); +// hiveAST.limit = ASTBuilder.limit(offset, fetch); +// } + } + + private void convertFieldCollationsToASTNode( + RelNode node, Schema schema, List fieldCollations, Map obRefToCallMap, + int astToken, String astText) { + if (fieldCollations.isEmpty()) { + return; + } + + // 1 Add order/sort by token + ASTNode orderAst = ASTBuilder.createAST(astToken, astText); + + RexNode obExpr; + ASTNode astCol; + for (RelFieldCollation c : fieldCollations) { + + // 2 Add Direction token + ASTNode directionAST = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder + .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder + .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); + ASTNode nullDirectionAST; + // Null direction + if (c.nullDirection == RelFieldCollation.NullDirection.FIRST) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); + directionAST.addChild(nullDirectionAST); + } else if (c.nullDirection == RelFieldCollation.NullDirection.LAST) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); + directionAST.addChild(nullDirectionAST); + } else { + // Default + if (c.getDirection() == RelFieldCollation.Direction.ASCENDING) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); + directionAST.addChild(nullDirectionAST); + } else { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); + directionAST.addChild(nullDirectionAST); } - hiveAST.order = orderAst; } - RexNode offsetExpr = hiveSortLimit.getOffsetExpr(); - RexNode fetchExpr = hiveSortLimit.getFetchExpr(); - if (fetchExpr != null) { - Object offset = (offsetExpr == null) ? Integer.valueOf(0) : ((RexLiteral) offsetExpr).getValue2(); - Object fetch = ((RexLiteral) fetchExpr).getValue2(); - hiveAST.limit = ASTBuilder.limit(offset, fetch); + // 3 Convert OB expr (OB Expr is usually an input ref except for top + // level OB; top level OB will have RexCall kept in a map.) + obExpr = null; + if (obRefToCallMap != null) { + obExpr = obRefToCallMap.get(c.getFieldIndex()); } + + if (obExpr != null) { + astCol = obExpr.accept(new RexVisitor(schema, false, node.getCluster().getRexBuilder())); + } else { + ColumnInfo cI = schema.get(c.getFieldIndex()); + /* + * The RowResolver setup for Select drops Table associations. So + * setup ASTNode on unqualified name. + */ + astCol = ASTBuilder.unqualifiedName(cI.column); + } + + // 4 buildup the ob expr AST + nullDirectionAST.addChild(astCol); + orderAst.addChild(directionAST); } + hiveAST.order = orderAst; } private Schema getRowSchema(String tblAlias) { @@ -463,11 +497,11 @@ public void visit(RelNode node, int ordinal, RelNode parent) { ASTConverter.this.from = node; } else if (node instanceof Aggregate) { ASTConverter.this.groupBy = (Aggregate) node; - } else if (node instanceof Sort) { + } else if (node instanceof Sort || node instanceof Exchange) { if (ASTConverter.this.select != null) { ASTConverter.this.from = node; } else { - ASTConverter.this.orderLimit = (Sort) node; + ASTConverter.this.orderLimit = node; } } /* @@ -872,11 +906,12 @@ public QueryBlockInfo(Schema schema, ASTNode ast) { * Hive Sort Node * @return Schema */ - public Schema(HiveSortLimit order) { - Project select = (Project) order.getInput(); - for (String projName : select.getRowType().getFieldNames()) { - add(new ColumnInfo(null, projName)); - } + Schema(HiveSortLimit order) { + this((Project) order.getInput(), null); + } + + Schema(HiveSortExchange sort) { + this((Project) sort.getInput(), null); } public Schema(String tabAlias, List fieldList) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java index 31619c0314..06e311bdd1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java @@ -28,6 +28,7 @@ import org.apache.calcite.rel.SingleRel; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Exchange; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.Project; @@ -47,6 +48,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; @@ -70,7 +72,7 @@ public static RelNode convertOpTree(RelNode rel, List resultSchema, LOG.debug("Original plan for PlanModifier\n " + RelOptUtil.toString(newTopNode)); } - if (!(newTopNode instanceof Project) && !(newTopNode instanceof Sort)) { + if (!(newTopNode instanceof Project) && !(newTopNode instanceof Sort) && !(newTopNode instanceof Exchange)) { newTopNode = introduceDerivedTable(newTopNode); if (LOG.isDebugEnabled()) { LOG.debug("Plan after top-level introduceDerivedTable\n " @@ -174,6 +176,13 @@ private static void convertOpTree(RelNode rel, RelNode parent) { if (!validSortChild((HiveSortLimit) rel)) { introduceDerivedTable(((HiveSortLimit) rel).getInput(), rel); } + } else if (rel instanceof HiveSortExchange) { + if (!validExchangeParent(rel, parent)) { + introduceDerivedTable(rel, parent); + } + if (!validExchangeChild((HiveSortExchange) rel)) { + introduceDerivedTable(((HiveSortExchange) rel).getInput(), rel); + } } else if (rel instanceof HiveAggregate) { RelNode newParent = parent; if (!validGBParent(rel, parent)) { @@ -357,6 +366,28 @@ private static boolean validSortChild(HiveSortLimit sortNode) { return validChild; } + private static boolean validExchangeParent(RelNode sortNode, RelNode parent) { + boolean validParent = true; + + if (parent != null && !(parent instanceof Project) && + !(HiveCalciteUtil.pureLimitRelNode(parent) && HiveCalciteUtil.pureOrderRelNode(sortNode))) { + validParent = false; + } + + return validParent; + } + + private static boolean validExchangeChild(HiveSortExchange sortNode) { + boolean validChild = true; + RelNode child = sortNode.getInput(); + + if (!(child instanceof Project) && + !(HiveCalciteUtil.pureLimitRelNode(sortNode) && HiveCalciteUtil.pureOrderRelNode(child))) { + validChild = false; + } + + return validChild; + } private static boolean validSetopParent(RelNode setop, RelNode parent) { boolean validChild = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 537355f7ed..753c6b4339 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -60,6 +60,7 @@ import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationImpl; import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelDistribution; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelVisitor; @@ -151,6 +152,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; @@ -227,7 +229,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; -import org.apache.hadoop.hive.ql.parse.type.JoinCondTypeCheckProcFactory; import org.apache.hadoop.hive.ql.parse.type.JoinTypeCheckCtx; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.PlanModifierForReturnPath; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; @@ -900,7 +901,7 @@ private static String canHandleQbForCbo(QueryProperties queryProperties, HiveCon boolean topLevelQB, boolean verbose) { if (!queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() - && !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript() + && !queryProperties.hasPTF() && !queryProperties.usesScript() && queryProperties.isCBOSupportedLateralViews()) { // Ok to run CBO. return null; @@ -915,9 +916,6 @@ private static String canHandleQbForCbo(QueryProperties queryProperties, HiveCon if (queryProperties.hasDistributeBy()) { msg += "has distribute by; "; } - if (queryProperties.hasSortBy()) { - msg += "has sort by; "; - } if (queryProperties.hasPTF()) { msg += "has PTF; "; } @@ -3900,217 +3898,263 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException * @throws SemanticException */ private RelNode genOBLogicalPlan(QB qb, Pair selPair, - boolean outermostOB) throws SemanticException { - // selPair.getKey() is the operator right before OB - // selPair.getValue() is RR which only contains columns needed in result - // set. Extra columns needed by order by will be absent from it. - RelNode srcRel = selPair.getKey(); - RowResolver selectOutputRR = selPair.getValue(); - RelNode sortRel = null; - RelNode returnRel = null; + boolean outermostOB) throws SemanticException { QBParseInfo qbp = getQBParseInfo(qb); String dest = qbp.getClauseNames().iterator().next(); ASTNode obAST = qbp.getOrderByForClause(dest); - if (obAST != null) { - // 1. OB Expr sanity test - // in strict mode, in the presence of order by, limit must be - // specified - Integer limit = qb.getParseInfo().getDestLimit(dest); - if (limit == null) { - String error = StrictChecks.checkNoLimit(conf); - if (error != null) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, error)); - } + if (obAST == null) { + return null; + } + + // 1. OB Expr sanity test + // in strict mode, in the presence of order by, limit must be + // specified + Integer limit = qb.getParseInfo().getDestLimit(dest); + if (limit == null) { + String error = StrictChecks.checkNoLimit(conf); + if (error != null) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, error)); } + } - // 2. Walk through OB exprs and extract field collations and additional - // virtual columns needed - final List newVCLst = new ArrayList(); - final List fieldCollations = Lists.newArrayList(); - int fieldIndex = 0; + OBLogicalPlanGenState obLogicalPlanGenState = beginGenOBLogicalPlan(obAST, selPair, outermostOB); - List obASTExprLst = obAST.getChildren(); - ASTNode obASTExpr; - ASTNode nullObASTExpr; - List> vcASTTypePairs = new ArrayList>(); - RowResolver inputRR = relToHiveRR.get(srcRel); - RowResolver outputRR = new RowResolver(); - - RexNode rnd; - RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), - relToHiveColNameCalcitePosMap.get(srcRel), 0, false); - int srcRelRecordSz = srcRel.getRowType().getFieldCount(); - - for (int i = 0; i < obASTExprLst.size(); i++) { - // 2.1 Convert AST Expr to ExprNode - obASTExpr = (ASTNode) obASTExprLst.get(i); - nullObASTExpr = (ASTNode) obASTExpr.getChild(0); - ASTNode ref = (ASTNode) nullObASTExpr.getChild(0); - Map astToExprNDescMap = null; - ExprNodeDesc obExprNDesc = null; - - boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); - boolean isObyByPos = isBothByPos - || HiveConf.getBoolVar(conf, ConfVars.HIVE_ORDERBY_POSITION_ALIAS); - // replace each of the position alias in ORDERBY with the actual column - if (ref != null && ref.getToken().getType() == HiveParser.Number) { - if (isObyByPos) { - int pos = Integer.parseInt(ref.getText()); - if (pos > 0 && pos <= selectOutputRR.getColumnInfos().size()) { - // fieldIndex becomes so simple - // Note that pos starts from 1 while fieldIndex starts from 0; - fieldIndex = pos - 1; - } else { - throw new SemanticException( - ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg("Position alias: " + pos - + " does not exist\n" + "The Select List is indexed from 1 to " - + selectOutputRR.getColumnInfos().size())); - } - } else { // if not using position alias and it is a number. - LOG.warn("Using constant number " - + ref.getText() - + " in order by. If you try to use position alias when hive.orderby.position.alias is false, the position alias will be ignored."); - } - } else { - // first try to get it from select - // in case of udtf, selectOutputRR may be null. - if (selectOutputRR != null) { - try { - astToExprNDescMap = genAllExprNodeDesc(ref, selectOutputRR); - obExprNDesc = astToExprNDescMap.get(ref); - } catch (SemanticException ex) { - // we can tolerate this as this is the previous behavior - LOG.debug("Can not find column in " + ref.getText() + ". The error msg is " - + ex.getMessage()); - } + // 4. Construct SortRel + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); + RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(obLogicalPlanGenState.getCanonizedCollation())); + RelNode sortRel = new HiveSortLimit( + cluster, traitSet, obLogicalPlanGenState.getObInputRel(), canonizedCollation, null, null); + + return endGenOBLogicalPlan(obLogicalPlanGenState, sortRel); + } + + private RelNode genSBLogicalPlan(QB qb, Pair selPair, + boolean outermostOB) throws SemanticException { + + QBParseInfo qbp = getQBParseInfo(qb); + String dest = qbp.getClauseNames().iterator().next(); + ASTNode sbAST = qbp.getSortByForClause(dest); + + if (sbAST == null) { + return null; + } + + OBLogicalPlanGenState obLogicalPlanGenState = beginGenOBLogicalPlan(sbAST, selPair, outermostOB); + + // 4. Construct SortRel + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); + RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(obLogicalPlanGenState.getCanonizedCollation())); + List joinKeyPositions = new ArrayList<>(canonizedCollation.getFieldCollations().size()); + ImmutableList.Builder builder = ImmutableList.builder(); + for (RelFieldCollation relFieldCollation : canonizedCollation.getFieldCollations()) { + int index = relFieldCollation.getFieldIndex(); + joinKeyPositions.add(index); + builder.add(cluster.getRexBuilder().makeInputRef(obLogicalPlanGenState.getObInputRel(), index)); + } + + RelNode sortRel = HiveSortExchange.create( + obLogicalPlanGenState.getObInputRel(), + new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, joinKeyPositions), + canonizedCollation, + builder.build()); + + return endGenOBLogicalPlan(obLogicalPlanGenState, sortRel); + } + + private OBLogicalPlanGenState beginGenOBLogicalPlan(ASTNode obAST, Pair selPair, + boolean outermostOB) throws SemanticException { + // selPair.getKey() is the operator right before OB + // selPair.getValue() is RR which only contains columns needed in result + // set. Extra columns needed by order by will be absent from it. + RelNode srcRel = selPair.getKey(); + RowResolver selectOutputRR = selPair.getValue(); + + // 2. Walk through OB exprs and extract field collations and additional + // virtual columns needed + final List newVCLst = new ArrayList(); + final List fieldCollations = Lists.newArrayList(); + int fieldIndex = 0; + + List obASTExprLst = obAST.getChildren(); + ASTNode obASTExpr; + ASTNode nullObASTExpr; + List> vcASTTypePairs = new ArrayList>(); + RowResolver inputRR = relToHiveRR.get(srcRel); + RowResolver outputRR = new RowResolver(); + + RexNode rnd; + RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), + relToHiveColNameCalcitePosMap.get(srcRel), 0, false); + int srcRelRecordSz = srcRel.getRowType().getFieldCount(); + + for (int i = 0; i < obASTExprLst.size(); i++) { + // 2.1 Convert AST Expr to ExprNode + obASTExpr = (ASTNode) obASTExprLst.get(i); + nullObASTExpr = (ASTNode) obASTExpr.getChild(0); + ASTNode ref = (ASTNode) nullObASTExpr.getChild(0); + Map astToExprNDescMap = null; + ExprNodeDesc obExprNDesc = null; + + boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); + boolean isObyByPos = isBothByPos + || HiveConf.getBoolVar(conf, ConfVars.HIVE_ORDERBY_POSITION_ALIAS); + // replace each of the position alias in ORDERBY with the actual column + if (ref != null && ref.getToken().getType() == HiveParser.Number) { + if (isObyByPos) { + int pos = Integer.parseInt(ref.getText()); + if (pos > 0 && pos <= selectOutputRR.getColumnInfos().size()) { + // fieldIndex becomes so simple + // Note that pos starts from 1 while fieldIndex starts from 0; + fieldIndex = pos - 1; + } else { + throw new SemanticException( + ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg("Position alias: " + pos + + " does not exist\n" + "The Select List is indexed from 1 to " + + selectOutputRR.getColumnInfos().size())); } - // then try to get it from all - if (obExprNDesc == null) { - astToExprNDescMap = genAllExprNodeDesc(ref, inputRR); + } else { // if not using position alias and it is a number. + LOG.warn("Using constant number " + + ref.getText() + + " in order by. If you try to use position alias when hive.orderby.position.alias is false, the position alias will be ignored."); + } + } else { + // first try to get it from select + // in case of udtf, selectOutputRR may be null. + if (selectOutputRR != null) { + try { + astToExprNDescMap = genAllExprNodeDesc(ref, selectOutputRR); obExprNDesc = astToExprNDescMap.get(ref); - } - if (obExprNDesc == null) { - throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); - } - // 2.2 Convert ExprNode to RexNode - rnd = converter.convert(obExprNDesc); - - // 2.3 Determine the index of ob expr in child schema - // NOTE: Calcite can not take compound exprs in OB without it being - // present in the child (& hence we add a child Project Rel) - if (rnd instanceof RexInputRef) { - fieldIndex = ((RexInputRef) rnd).getIndex(); - } else { - fieldIndex = srcRelRecordSz + newVCLst.size(); - newVCLst.add(rnd); - vcASTTypePairs.add(new Pair(ref, obExprNDesc.getTypeInfo())); + } catch (SemanticException ex) { + // we can tolerate this as this is the previous behavior + LOG.debug("Can not find column in " + ref.getText() + ". The error msg is " + + ex.getMessage()); } } - - // 2.4 Determine the Direction of order by - RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; - if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { - order = RelFieldCollation.Direction.ASCENDING; + // then try to get it from all + if (obExprNDesc == null) { + astToExprNDescMap = genAllExprNodeDesc(ref, inputRR); + obExprNDesc = astToExprNDescMap.get(ref); } - RelFieldCollation.NullDirection nullOrder; - if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_FIRST) { - nullOrder = RelFieldCollation.NullDirection.FIRST; - } else if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_LAST) { - nullOrder = RelFieldCollation.NullDirection.LAST; + if (obExprNDesc == null) { + throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); + } + // 2.2 Convert ExprNode to RexNode + rnd = converter.convert(obExprNDesc); + + // 2.3 Determine the index of ob expr in child schema + // NOTE: Calcite can not take compound exprs in OB without it being + // present in the child (& hence we add a child Project Rel) + if (rnd instanceof RexInputRef) { + fieldIndex = ((RexInputRef) rnd).getIndex(); } else { - throw new SemanticException("Unexpected null ordering option: " - + nullObASTExpr.getType()); + fieldIndex = srcRelRecordSz + newVCLst.size(); + newVCLst.add(rnd); + vcASTTypePairs.add(new Pair(ref, obExprNDesc.getTypeInfo())); } - - // 2.5 Add to field collations - fieldCollations.add(new RelFieldCollation(fieldIndex, order, nullOrder)); } - // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel - // for top constraining Sel - RelNode obInputRel = srcRel; - if (!newVCLst.isEmpty()) { - List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), - new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); - RowResolver obSyntheticProjectRR = new RowResolver(); - if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); - } - int vcolPos = inputRR.getRowSchema().getSignature().size(); - for (Pair astTypePair : vcASTTypePairs) { - obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( - SemanticAnalyzer.getColumnInternalName(vcolPos), astTypePair.getValue(), null, - false)); - vcolPos++; - } - obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), - obSyntheticProjectRR, srcRel); + // 2.4 Determine the Direction of order by + RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; + if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { + order = RelFieldCollation.Direction.ASCENDING; + } + RelFieldCollation.NullDirection nullOrder; + if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_FIRST) { + nullOrder = RelFieldCollation.NullDirection.FIRST; + } else if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_LAST) { + nullOrder = RelFieldCollation.NullDirection.LAST; + } else { + throw new SemanticException("Unexpected null ordering option: " + + nullObASTExpr.getType()); + } - if (outermostOB) { - if (!RowResolver.add(outputRR, inputRR)) { - throw new CalciteSemanticException( + // 2.5 Add to field collations + fieldCollations.add(new RelFieldCollation(fieldIndex, order, nullOrder)); + } + + // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel + // for top constraining Sel + RelNode obInputRel = srcRel; + if (!newVCLst.isEmpty()) { + List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), + new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + RowResolver obSyntheticProjectRR = new RowResolver(); + if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { + throw new CalciteSemanticException( "Duplicates detected when adding columns to RR: see previous message", UnsupportedFeature.Duplicates_in_RR); - } + } + int vcolPos = inputRR.getRowSchema().getSignature().size(); + for (Pair astTypePair : vcASTTypePairs) { + obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( + SemanticAnalyzer.getColumnInternalName(vcolPos), astTypePair.getValue(), null, + false)); + vcolPos++; + } + obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), + obSyntheticProjectRR, srcRel); - } else { - if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); - } + if (outermostOB) { + if (!RowResolver.add(outputRR, inputRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); } + } else { - if (!RowResolver.add(outputRR, inputRR)) { + if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); } } + } else { + if (!RowResolver.add(outputRR, inputRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); + } + } + return new OBLogicalPlanGenState(obInputRel, fieldCollations, selectOutputRR, outputRR, srcRel); + } - // 4. Construct SortRel - RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); - RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); - sortRel = new HiveSortLimit(cluster, traitSet, obInputRel, canonizedCollation, null, null); - - // 5. Update the maps - // NOTE: Output RR for SortRel is considered same as its input; we may - // end up not using VC that is present in sort rel. Also note that - // rowtype of sortrel is the type of it child; if child happens to be - // synthetic project that we introduced then that projectrel would - // contain the vc. - ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR); - relToHiveRR.put(sortRel, outputRR); - relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); - - if (selectOutputRR != null) { - List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), - new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); - List selectedRefs = Lists.newArrayList(); - for (int index = 0; index < selectOutputRR.getColumnInfos().size(); index++) { - selectedRefs.add(originalInputRefs.get(index)); - } - // We need to add select since order by schema may have more columns than result schema. - returnRel = genSelectRelNode(selectedRefs, selectOutputRR, sortRel); - } else { - returnRel = sortRel; + public RelNode endGenOBLogicalPlan(OBLogicalPlanGenState obLogicalPlanGenState, RelNode sortRel) throws CalciteSemanticException { + + // 5. Update the maps + // NOTE: Output RR for SortRel is considered same as its input; we may + // end up not using VC that is present in sort rel. Also note that + // rowtype of sortrel is the type of it child; if child happens to be + // synthetic project that we introduced then that projectrel would + // contain the vc. + ImmutableMap hiveColNameCalcitePosMap = + buildHiveToCalciteColumnMap(obLogicalPlanGenState.getOutputRR()); + relToHiveRR.put(sortRel, obLogicalPlanGenState.getOutputRR()); + relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); + + if (obLogicalPlanGenState.getSelectOutputRR() != null) { + List originalInputRefs = Lists.transform(obLogicalPlanGenState.getSrcRel().getRowType().getFieldList(), + new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + List selectedRefs = Lists.newArrayList(); + for (int index = 0; index < obLogicalPlanGenState.getSelectOutputRR().getColumnInfos().size(); index++) { + selectedRefs.add(originalInputRefs.get(index)); } + // We need to add select since order by schema may have more columns than result schema. + return genSelectRelNode(selectedRefs, obLogicalPlanGenState.getSelectOutputRR(), sortRel); + } else { + return sortRel; } - return returnRel; } private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { @@ -4722,6 +4766,7 @@ private void setQueryHints(QB qb) throws SemanticException { } else { String dest = qbp.getClauseNames().iterator().next(); ASTNode obAST = qbp.getOrderByForClause(dest); + ASTNode sbAST = qbp.getSortByForClause(dest); RowResolver originalRR = null; // We only support limited unselected column following by order by. @@ -4732,7 +4777,7 @@ private void setQueryHints(QB qb) throws SemanticException { // If DISTINCT is present, it is not possible to ORDER BY unselected // columns, and in fact adding all columns would change the behavior of // DISTINCT, so we bypass this logic. - if (obAST != null + if ((obAST != null || sbAST != null) && selExprList.getToken().getType() != HiveParser.TOK_SELECTDI && !isAllColRefRewrite) { // 1. OB Expr sanity test @@ -4957,6 +5002,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, RelNode gbHavingRel = null; RelNode selectRel = null; RelNode obRel = null; + RelNode sbRel = null; RelNode limitRel = null; // First generate all the opInfos for the elements in the from clause @@ -5048,6 +5094,10 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, obRel = genOBLogicalPlan(qb, selPair, outerMostQB); srcRel = (obRel == null) ? srcRel : obRel; +// 6. Build Rel for OB Clause + sbRel = genSBLogicalPlan(qb, selPair, outerMostQB); + srcRel = (sbRel == null) ? srcRel : sbRel; + // 7. Build Rel for Limit Clause limitRel = genLimitLogicalPlan(qb, srcRel); srcRel = (limitRel == null) ? srcRel : limitRel; @@ -5195,6 +5245,43 @@ private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { } } + private static class OBLogicalPlanGenState { + private final RelNode obInputRel; + private final List canonizedCollation; + private final RowResolver selectOutputRR; + private final RowResolver outputRR; + private final RelNode srcRel; + + public OBLogicalPlanGenState(RelNode obInputRel, List canonizedCollation, + RowResolver selectOutputRR, RowResolver outputRR, RelNode srcRel) { + this.obInputRel = obInputRel; + this.canonizedCollation = canonizedCollation; + this.selectOutputRR = selectOutputRR; + this.outputRR = outputRR; + this.srcRel = srcRel; + } + + public RelNode getObInputRel() { + return obInputRel; + } + + public List getCanonizedCollation() { + return canonizedCollation; + } + + public RowResolver getSelectOutputRR() { + return selectOutputRR; + } + + public RowResolver getOutputRR() { + return outputRR; + } + + public RelNode getSrcRel() { + return srcRel; + } + } + @Override protected Table getTableObjectByName(String tabName, boolean throwException) throws HiveException { String[] names = Utilities.getDbTableName(tabName); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java index 31068cb8c3..7b25030442 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java @@ -52,7 +52,7 @@ * statements (since they are actually inserts) and then doing some patch up to make them work as * updates and deletes instead. */ -public abstract class RewriteSemanticAnalyzer extends SemanticAnalyzer { +public abstract class RewriteSemanticAnalyzer extends CalcitePlanner { protected static final Logger LOG = LoggerFactory.getLogger(RewriteSemanticAnalyzer.class); protected boolean useSuper = false; diff --git ql/src/test/queries/clientpositive/authorization_view_disable_cbo_1.q ql/src/test/queries/clientpositive/authorization_view_disable_cbo_1.q index be50b69830..f228ccd609 100644 --- ql/src/test/queries/clientpositive/authorization_view_disable_cbo_1.q +++ ql/src/test/queries/clientpositive/authorization_view_disable_cbo_1.q @@ -64,7 +64,7 @@ set hive.cbo.enable=true; --although cbo is enabled, it will not succeed. -select key from v_n10 sort by key limit 10; +select key from v_n10 cluster by key limit 10; select key from (select key as key from src_autho_test_n9 union all select key from v_n10 cluster by key)subq diff --git ql/src/test/queries/clientpositive/sort.q ql/src/test/queries/clientpositive/sort.q index cab2712810..93b2faf8bd 100644 --- ql/src/test/queries/clientpositive/sort.q +++ ql/src/test/queries/clientpositive/sort.q @@ -1,7 +1,11 @@ --! qt:dataset:src -- SORT_QUERY_RESULTS -EXPLAIN -SELECT x.* FROM SRC x SORT BY key; +SELECT x.key, x.value FROM SRC x ORDER BY 1; + +SELECT x.key, x.value FROM SRC x SORT BY 1; + +--EXPLAIN +--SELECT x.* FROM SRC x SORT BY key; SELECT x.* FROM SRC x SORT BY key; diff --git ql/src/test/results/clientpositive/llap/explainuser_1.q.out ql/src/test/results/clientpositive/llap/explainuser_1.q.out index b82a055079..5331200522 100644 --- ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -271,7 +271,7 @@ POSTHOOK: query: drop table src_orc_merge_test_part_n1 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@src_orc_merge_test_part_n1 POSTHOOK: Output: default@src_orc_merge_test_part_n1 -Warning: Shuffle Join MERGEJOIN[18][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( select src1.key as k1, src1.value as v1, @@ -296,7 +296,7 @@ select src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Plan not optimized by CBO because the statement has sort by +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) @@ -313,25 +313,27 @@ Stage-0 <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_12] Group By Operator [GBY_11] (rows=1 width=8) - Output:["_col0"],aggregations:["sum(hash(_col0,_col1,_col2,_col3))"] - Merge Join Operator [MERGEJOIN_18] (rows=27556 width=356) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Map 1 [XPROD_EDGE] llap - XPROD_EDGE [RS_6] - Select Operator [SEL_2] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=166 width=178) - predicate:(key < 10) - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 4 [XPROD_EDGE] llap - XPROD_EDGE [RS_7] - Select Operator [SEL_5] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=166 width=178) - predicate:(key < 10) - TableScan [TS_3] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Output:["_col0"],aggregations:["sum(_col0)"] + Select Operator [SEL_9] (rows=27556 width=356) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_18] (rows=27556 width=356) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 1 [XPROD_EDGE] llap + XPROD_EDGE [RS_6] + Select Operator [SEL_2] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_16] (rows=166 width=178) + predicate:(UDFToDouble(key) < 10.0D) + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [XPROD_EDGE] llap + XPROD_EDGE [RS_7] + Select Operator [SEL_5] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_17] (rows=166 width=178) + predicate:(UDFToDouble(key) < 10.0D) + TableScan [TS_3] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key PREHOOK: type: QUERY @@ -3716,11 +3718,11 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_CTAS1_n1 -Plan not optimized by CBO because the statement has sort by +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Stage-3 @@ -3731,36 +3733,34 @@ Stage-3 Move Operator Stage-1 Reducer 4 llap - File Output Operator [FS_17] - Group By Operator [GBY_15] (rows=1 width=880) + File Output Operator [FS_18] + Group By Operator [GBY_16] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_8] + File Output Operator [FS_9] table:{"name:":"default.nzhang_CTAS1_n1"} - Limit [LIM_7] (rows=10 width=178) + Limit [LIM_8] (rows=10 width=178) Number of rows:10 - Select Operator [SEL_6] (rows=10 width=178) + Select Operator [SEL_7] (rows=10 width=178) Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_5] - Top N Key Operator [TNK_18] (rows=10 width=178) - keys:_col0, _col1,top n:10 - Limit [LIM_4] (rows=10 width=178) - Number of rows:10 - Select Operator [SEL_3] (rows=500 width=178) - Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500 width=178) - Output:["_col0","_col1"] - Top N Key Operator [TNK_19] (rows=500 width=178) - keys:key, value,top n:10 - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_14] - Select Operator [SEL_13] (rows=10 width=178) + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_6] + Limit [LIM_5] (rows=10 width=178) + Number of rows:10 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_19] (rows=500 width=178) + keys:key, value,top n:10 + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_15] + Select Operator [SEL_14] (rows=10 width=178) Output:["col1","col2"] - Please refer to the previous Limit [LIM_7] + Please refer to the previous Limit [LIM_8] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -3787,11 +3787,11 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_ctas3_n1 -Plan not optimized by CBO because the statement has sort by +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Stage-3 @@ -3802,36 +3802,34 @@ Stage-3 Move Operator Stage-1 Reducer 4 llap - File Output Operator [FS_17] - Group By Operator [GBY_15] (rows=1 width=880) + File Output Operator [FS_18] + Group By Operator [GBY_16] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_8] + File Output Operator [FS_9] table:{"name:":"default.nzhang_ctas3_n1"} - Limit [LIM_7] (rows=10 width=192) + Limit [LIM_8] (rows=10 width=192) Number of rows:10 - Select Operator [SEL_6] (rows=10 width=192) + Select Operator [SEL_7] (rows=10 width=192) Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_5] - Top N Key Operator [TNK_18] (rows=10 width=192) - keys:_col0, _col1,top n:10 - Limit [LIM_4] (rows=10 width=192) - Number of rows:10 - Select Operator [SEL_3] (rows=500 width=192) - Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500 width=192) - Output:["_col0","_col1"] - Top N Key Operator [TNK_19] (rows=500 width=178) - keys:(key / 2), concat(value, '_con'),top n:10 - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_14] - Select Operator [SEL_13] (rows=10 width=192) + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_6] + Limit [LIM_5] (rows=10 width=192) + Number of rows:10 + Select Operator [SEL_3] (rows=500 width=192) + Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500 width=192) + Output:["_col0","_col1"] + Top N Key Operator [TNK_19] (rows=500 width=178) + keys:(UDFToDouble(key) / 2.0D), concat(value, '_con'),top n:10 + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_15] + Select Operator [SEL_14] (rows=10 width=192) Output:["col1","col2"] - Please refer to the previous Limit [LIM_7] + Please refer to the previous Limit [LIM_8] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -3896,7 +3894,7 @@ select src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Plan not optimized by CBO because the statement has sort by +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) @@ -3919,7 +3917,7 @@ Stage-0 Select Operator [SEL_2] (rows=166 width=178) Output:["_col0","_col1"] Filter Operator [FIL_13] (rows=166 width=178) - predicate:(key < 10) + predicate:(UDFToDouble(key) < 10.0D) TableScan [TS_0] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 4 [CUSTOM_SIMPLE_EDGE] llap @@ -3927,7 +3925,7 @@ Stage-0 Select Operator [SEL_5] (rows=166 width=178) Output:["_col0","_col1"] Filter Operator [FIL_14] (rows=166 width=178) - predicate:(key < 10) + predicate:(UDFToDouble(key) < 10.0D) TableScan [TS_3] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 2c9c7aa8ac..30a812cf65 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -268,7 +268,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -319,50 +318,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 1, 4, 5, 0] - Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col5 (type: int), _col2 (type: date) - null sort order: zz - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE - top n: 25 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int, col 1:date - native: true - Reduce Output Operator - key expressions: _col5 (type: int), _col2 (type: date) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -378,7 +333,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 1, 4, 5, 0] - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -1259,7 +1214,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1310,50 +1264,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col4 (type: int), _col5 (type: date) - null sort order: zz - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - top n: 25 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int, col 1:date - native: true - Reduce Output Operator - key expressions: _col4 (type: int), _col5 (type: date) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1369,7 +1279,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -2322,7 +2232,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2373,50 +2282,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col4 (type: int), _col5 (type: timestamp) - null sort order: zz - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - top n: 25 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int, col 1:timestamp - native: true - Reduce Output Operator - key expressions: _col4 (type: int), _col5 (type: timestamp) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -2432,7 +2297,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -2941,7 +2806,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2992,50 +2856,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 1, 4, 5, 0] - Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col5 (type: int), _col2 (type: date) - null sort order: zz - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE - top n: 25 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int, col 1:date - native: true - Reduce Output Operator - key expressions: _col5 (type: int), _col2 (type: date) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -3051,7 +2871,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 1, 4, 5, 0] - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -3932,7 +3752,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3983,50 +3802,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col4 (type: int), _col5 (type: date) - null sort order: zz - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - top n: 25 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int, col 1:date - native: true - Reduce Output Operator - key expressions: _col4 (type: int), _col5 (type: date) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -4042,7 +3817,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -4995,7 +4770,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5046,50 +4820,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col4 (type: int), _col5 (type: timestamp) - null sort order: zz - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - top n: 25 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int, col 1:timestamp - native: true - Reduce Output Operator - key expressions: _col4 (type: int), _col5 (type: timestamp) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -5105,7 +4835,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: diff --git ql/src/test/results/clientpositive/sort.q.out ql/src/test/results/clientpositive/sort.q.out index 42cbd83fdc..8d8b827aba 100644 --- ql/src/test/results/clientpositive/sort.q.out +++ ql/src/test/results/clientpositive/sort.q.out @@ -1,54 +1,1019 @@ -PREHOOK: query: EXPLAIN -SELECT x.* FROM SRC x SORT BY key +PREHOOK: query: SELECT x.key, x.value FROM SRC x ORDER BY 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT x.* FROM SRC x SORT BY key +POSTHOOK: query: SELECT x.key, x.value FROM SRC x ORDER BY 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 +403 val_403 +403 val_403 +403 val_403 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +42 val_42 +421 val_421 +424 val_424 +424 val_424 +427 val_427 +429 val_429 +429 val_429 +43 val_43 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +458 val_458 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +67 val_67 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: SELECT x.key, x.value FROM SRC x SORT BY 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.key, x.value FROM SRC x SORT BY 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 +403 val_403 +403 val_403 +403 val_403 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +42 val_42 +421 val_421 +424 val_424 +424 val_424 +427 val_427 +429 val_429 +429 val_429 +43 val_43 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +458 val_458 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +67 val_67 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 PREHOOK: query: SELECT x.* FROM SRC x SORT BY key PREHOOK: type: QUERY PREHOOK: Input: default@src