diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index e03e96ff12..a56e0cbe77 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -32,6 +32,7 @@ import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Aggregate.Group; import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Exchange; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; @@ -62,6 +63,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; @@ -88,7 +90,7 @@ private Aggregate groupBy; private Filter having; private RelNode select; - private Sort orderLimit; + private RelNode orderLimit; private Schema schema; @@ -255,7 +257,7 @@ private ASTNode convert() throws CalciteSemanticException { * its parent. * 8. Limit */ - convertOrderLimitToASTNode((HiveSortLimit) orderLimit); + convertOrderToASTNode(orderLimit); return hiveAST.getAST(); } @@ -275,75 +277,109 @@ private ASTNode buildUDTFAST(String functionName, List children) { return node; } - private void convertOrderLimitToASTNode(HiveSortLimit order) { - if (order != null) { - HiveSortLimit hiveSortLimit = order; - if (!hiveSortLimit.getCollation().getFieldCollations().isEmpty()) { - // 1 Add order by token - ASTNode orderAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); - - schema = new Schema(hiveSortLimit); - Map obRefToCallMap = hiveSortLimit.getInputRefToCallMap(); - RexNode obExpr; - ASTNode astCol; - for (RelFieldCollation c : hiveSortLimit.getCollation().getFieldCollations()) { - - // 2 Add Direction token - ASTNode directionAST = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder - .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder - .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); - ASTNode nullDirectionAST; - // Null direction - if (c.nullDirection == RelFieldCollation.NullDirection.FIRST) { - nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); - directionAST.addChild(nullDirectionAST); - } else if (c.nullDirection == RelFieldCollation.NullDirection.LAST) { - nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); - directionAST.addChild(nullDirectionAST); - } else { - // Default - if (c.getDirection() == RelFieldCollation.Direction.ASCENDING) { - nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); - directionAST.addChild(nullDirectionAST); - } else { - nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); - directionAST.addChild(nullDirectionAST); - } - } + private void convertOrderToASTNode(RelNode node) { + if (node == null) { + return; + } - // 3 Convert OB expr (OB Expr is usually an input ref except for top - // level OB; top level OB will have RexCall kept in a map.) - obExpr = null; - if (obRefToCallMap != null) { - obExpr = obRefToCallMap.get(c.getFieldIndex()); - } + if (node instanceof HiveSortLimit) { + convertOrderLimitToASTNode((HiveSortLimit) node); + } else if (node instanceof HiveSortExchange) { + convertSortToASTNode((HiveSortExchange) node); + } + } - if (obExpr != null) { - astCol = obExpr.accept(new RexVisitor(schema, false, order.getCluster().getRexBuilder())); - } else { - ColumnInfo cI = schema.get(c.getFieldIndex()); - /* - * The RowResolver setup for Select drops Table associations. So - * setup ASTNode on unqualified name. - */ - astCol = ASTBuilder.unqualifiedName(cI.column); - } + private void convertOrderLimitToASTNode(HiveSortLimit hiveSortLimit) { + List fieldCollations = hiveSortLimit.getCollation().getFieldCollations(); + Schema schema = new Schema(hiveSortLimit); + convertFieldCollationsToASTNode(hiveSortLimit, schema, fieldCollations, hiveSortLimit.getInputRefToCallMap(), + HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); - // 4 buildup the ob expr AST - nullDirectionAST.addChild(astCol); - orderAst.addChild(directionAST); + RexNode offsetExpr = hiveSortLimit.getOffsetExpr(); + RexNode fetchExpr = hiveSortLimit.getFetchExpr(); + if (fetchExpr != null) { + Object offset = (offsetExpr == null) ? Integer.valueOf(0) : ((RexLiteral) offsetExpr).getValue2(); + Object fetch = ((RexLiteral) fetchExpr).getValue2(); + hiveAST.limit = ASTBuilder.limit(offset, fetch); + } + } + + private void convertSortToASTNode(HiveSortExchange hiveSortExchange) { + List fieldCollations = hiveSortExchange.getCollation().getFieldCollations(); + Schema schema = new Schema(hiveSortExchange); + convertFieldCollationsToASTNode(hiveSortExchange, schema, fieldCollations, null, + HiveParser.TOK_SORTBY, "TOK_SORTBY"); + + // TODO: sort by limit +// RexNode offsetExpr = hiveSortExchange.getOffsetExpr(); +// RexNode fetchExpr = hiveSortExchange.getFetchExpr(); +// if (fetchExpr != null) { +// Object offset = (offsetExpr == null) ? Integer.valueOf(0) : ((RexLiteral) offsetExpr).getValue2(); +// Object fetch = ((RexLiteral) fetchExpr).getValue2(); +// hiveAST.limit = ASTBuilder.limit(offset, fetch); +// } + } + + private void convertFieldCollationsToASTNode( + RelNode node, Schema schema, List fieldCollations, Map obRefToCallMap, + int astToken, String astText) { + if (fieldCollations.isEmpty()) { + return; + } + + // 1 Add order/sort by token + ASTNode orderAst = ASTBuilder.createAST(astToken, astText); + + RexNode obExpr; + ASTNode astCol; + for (RelFieldCollation c : fieldCollations) { + + // 2 Add Direction token + ASTNode directionAST = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder + .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder + .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); + ASTNode nullDirectionAST; + // Null direction + if (c.nullDirection == RelFieldCollation.NullDirection.FIRST) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); + directionAST.addChild(nullDirectionAST); + } else if (c.nullDirection == RelFieldCollation.NullDirection.LAST) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); + directionAST.addChild(nullDirectionAST); + } else { + // Default + if (c.getDirection() == RelFieldCollation.Direction.ASCENDING) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); + directionAST.addChild(nullDirectionAST); + } else { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); + directionAST.addChild(nullDirectionAST); } - hiveAST.order = orderAst; } - RexNode offsetExpr = hiveSortLimit.getOffsetExpr(); - RexNode fetchExpr = hiveSortLimit.getFetchExpr(); - if (fetchExpr != null) { - Object offset = (offsetExpr == null) ? Integer.valueOf(0) : ((RexLiteral) offsetExpr).getValue2(); - Object fetch = ((RexLiteral) fetchExpr).getValue2(); - hiveAST.limit = ASTBuilder.limit(offset, fetch); + // 3 Convert OB expr (OB Expr is usually an input ref except for top + // level OB; top level OB will have RexCall kept in a map.) + obExpr = null; + if (obRefToCallMap != null) { + obExpr = obRefToCallMap.get(c.getFieldIndex()); + } + + if (obExpr != null) { + astCol = obExpr.accept(new RexVisitor(schema, false, node.getCluster().getRexBuilder())); + } else { + ColumnInfo cI = schema.get(c.getFieldIndex()); + /* + * The RowResolver setup for Select drops Table associations. So + * setup ASTNode on unqualified name. + */ + astCol = ASTBuilder.unqualifiedName(cI.column); } + + // 4 buildup the ob expr AST + nullDirectionAST.addChild(astCol); + orderAst.addChild(directionAST); } + hiveAST.order = orderAst; } private Schema getRowSchema(String tblAlias) { @@ -463,11 +499,11 @@ public void visit(RelNode node, int ordinal, RelNode parent) { ASTConverter.this.from = node; } else if (node instanceof Aggregate) { ASTConverter.this.groupBy = (Aggregate) node; - } else if (node instanceof Sort) { + } else if (node instanceof Sort || node instanceof Exchange) { if (ASTConverter.this.select != null) { ASTConverter.this.from = node; } else { - ASTConverter.this.orderLimit = (Sort) node; + ASTConverter.this.orderLimit = node; } } /* @@ -879,6 +915,14 @@ public Schema(HiveSortLimit order) { } } + // TODO: copy-paste + public Schema(HiveSortExchange sort) { + Project select = (Project) sort.getInput(); + for (String projName : select.getRowType().getFieldNames()) { + add(new ColumnInfo(null, projName)); + } + } + public Schema(String tabAlias, List fieldList) { for (RelDataTypeField field : fieldList) { add(new ColumnInfo(tabAlias, field.getName())); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java index 31619c0314..06e311bdd1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java @@ -28,6 +28,7 @@ import org.apache.calcite.rel.SingleRel; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Exchange; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.Project; @@ -47,6 +48,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; @@ -70,7 +72,7 @@ public static RelNode convertOpTree(RelNode rel, List resultSchema, LOG.debug("Original plan for PlanModifier\n " + RelOptUtil.toString(newTopNode)); } - if (!(newTopNode instanceof Project) && !(newTopNode instanceof Sort)) { + if (!(newTopNode instanceof Project) && !(newTopNode instanceof Sort) && !(newTopNode instanceof Exchange)) { newTopNode = introduceDerivedTable(newTopNode); if (LOG.isDebugEnabled()) { LOG.debug("Plan after top-level introduceDerivedTable\n " @@ -174,6 +176,13 @@ private static void convertOpTree(RelNode rel, RelNode parent) { if (!validSortChild((HiveSortLimit) rel)) { introduceDerivedTable(((HiveSortLimit) rel).getInput(), rel); } + } else if (rel instanceof HiveSortExchange) { + if (!validExchangeParent(rel, parent)) { + introduceDerivedTable(rel, parent); + } + if (!validExchangeChild((HiveSortExchange) rel)) { + introduceDerivedTable(((HiveSortExchange) rel).getInput(), rel); + } } else if (rel instanceof HiveAggregate) { RelNode newParent = parent; if (!validGBParent(rel, parent)) { @@ -357,6 +366,28 @@ private static boolean validSortChild(HiveSortLimit sortNode) { return validChild; } + private static boolean validExchangeParent(RelNode sortNode, RelNode parent) { + boolean validParent = true; + + if (parent != null && !(parent instanceof Project) && + !(HiveCalciteUtil.pureLimitRelNode(parent) && HiveCalciteUtil.pureOrderRelNode(sortNode))) { + validParent = false; + } + + return validParent; + } + + private static boolean validExchangeChild(HiveSortExchange sortNode) { + boolean validChild = true; + RelNode child = sortNode.getInput(); + + if (!(child instanceof Project) && + !(HiveCalciteUtil.pureLimitRelNode(sortNode) && HiveCalciteUtil.pureOrderRelNode(child))) { + validChild = false; + } + + return validChild; + } private static boolean validSetopParent(RelNode setop, RelNode parent) { boolean validChild = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 537355f7ed..4031d1f6ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -60,6 +60,7 @@ import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationImpl; import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelDistribution; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelVisitor; @@ -151,6 +152,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; @@ -900,7 +902,7 @@ private static String canHandleQbForCbo(QueryProperties queryProperties, HiveCon boolean topLevelQB, boolean verbose) { if (!queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() - && !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript() + && !queryProperties.hasPTF() && !queryProperties.usesScript() && queryProperties.isCBOSupportedLateralViews()) { // Ok to run CBO. return null; @@ -915,9 +917,6 @@ private static String canHandleQbForCbo(QueryProperties queryProperties, HiveCon if (queryProperties.hasDistributeBy()) { msg += "has distribute by; "; } - if (queryProperties.hasSortBy()) { - msg += "has sort by; "; - } if (queryProperties.hasPTF()) { msg += "has PTF; "; } @@ -3888,8 +3887,274 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException return gbRel; } +// /** +// * Generate OB RelNode and input Select RelNode that should be used to +// * introduce top constraining Project. If Input select RelNode is not +// * present then don't introduce top constraining select. +// * +// * @param qb +// * @param selPair +// * @param outermostOB +// * @return RelNode OB RelNode +// * @throws SemanticException +// */ +// private RelNode genOBLogicalPlan(QB qb, Pair selPair, +// boolean outermostOB) throws SemanticException { +// // selPair.getKey() is the operator right before OB +// // selPair.getValue() is RR which only contains columns needed in result +// // set. Extra columns needed by order by will be absent from it. +// RelNode srcRel = selPair.getKey(); +// RowResolver selectOutputRR = selPair.getValue(); +// RelNode sortRel = null; +// RelNode returnRel = null; +// +// QBParseInfo qbp = getQBParseInfo(qb); +// String dest = qbp.getClauseNames().iterator().next(); +// ASTNode obAST = qbp.getOrderByForClause(dest); +// +// if (obAST != null) { +// // 1. OB Expr sanity test +// // in strict mode, in the presence of order by, limit must be +// // specified +// Integer limit = qb.getParseInfo().getDestLimit(dest); +// if (limit == null) { +// String error = StrictChecks.checkNoLimit(conf); +// if (error != null) { +// throw new SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, error)); +// } +// } +// +// // 2. Walk through OB exprs and extract field collations and additional +// // virtual columns needed +// final List newVCLst = new ArrayList(); +// final List fieldCollations = Lists.newArrayList(); +// int fieldIndex = 0; +// +// List obASTExprLst = obAST.getChildren(); +// ASTNode obASTExpr; +// ASTNode nullObASTExpr; +// List> vcASTTypePairs = new ArrayList>(); +// RowResolver inputRR = relToHiveRR.get(srcRel); +// RowResolver outputRR = new RowResolver(); +// +// RexNode rnd; +// RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), +// relToHiveColNameCalcitePosMap.get(srcRel), 0, false); +// int srcRelRecordSz = srcRel.getRowType().getFieldCount(); +// +// for (int i = 0; i < obASTExprLst.size(); i++) { +// // 2.1 Convert AST Expr to ExprNode +// obASTExpr = (ASTNode) obASTExprLst.get(i); +// nullObASTExpr = (ASTNode) obASTExpr.getChild(0); +// ASTNode ref = (ASTNode) nullObASTExpr.getChild(0); +// Map astToExprNDescMap = null; +// ExprNodeDesc obExprNDesc = null; +// +// boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); +// boolean isObyByPos = isBothByPos +// || HiveConf.getBoolVar(conf, ConfVars.HIVE_ORDERBY_POSITION_ALIAS); +// // replace each of the position alias in ORDERBY with the actual column +// if (ref != null && ref.getToken().getType() == HiveParser.Number) { +// if (isObyByPos) { +// int pos = Integer.parseInt(ref.getText()); +// if (pos > 0 && pos <= selectOutputRR.getColumnInfos().size()) { +// // fieldIndex becomes so simple +// // Note that pos starts from 1 while fieldIndex starts from 0; +// fieldIndex = pos - 1; +// } else { +// throw new SemanticException( +// ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg("Position alias: " + pos +// + " does not exist\n" + "The Select List is indexed from 1 to " +// + selectOutputRR.getColumnInfos().size())); +// } +// } else { // if not using position alias and it is a number. +// LOG.warn("Using constant number " +// + ref.getText() +// + " in order by. If you try to use position alias when hive.orderby.position.alias is false, the position alias will be ignored."); +// } +// } else { +// // first try to get it from select +// // in case of udtf, selectOutputRR may be null. +// if (selectOutputRR != null) { +// try { +// astToExprNDescMap = genAllExprNodeDesc(ref, selectOutputRR); +// obExprNDesc = astToExprNDescMap.get(ref); +// } catch (SemanticException ex) { +// // we can tolerate this as this is the previous behavior +// LOG.debug("Can not find column in " + ref.getText() + ". The error msg is " +// + ex.getMessage()); +// } +// } +// // then try to get it from all +// if (obExprNDesc == null) { +// astToExprNDescMap = genAllExprNodeDesc(ref, inputRR); +// obExprNDesc = astToExprNDescMap.get(ref); +// } +// if (obExprNDesc == null) { +// throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); +// } +// // 2.2 Convert ExprNode to RexNode +// rnd = converter.convert(obExprNDesc); +// +// // 2.3 Determine the index of ob expr in child schema +// // NOTE: Calcite can not take compound exprs in OB without it being +// // present in the child (& hence we add a child Project Rel) +// if (rnd instanceof RexInputRef) { +// fieldIndex = ((RexInputRef) rnd).getIndex(); +// } else { +// fieldIndex = srcRelRecordSz + newVCLst.size(); +// newVCLst.add(rnd); +// vcASTTypePairs.add(new Pair(ref, obExprNDesc.getTypeInfo())); +// } +// } +// +// // 2.4 Determine the Direction of order by +// RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; +// if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { +// order = RelFieldCollation.Direction.ASCENDING; +// } +// RelFieldCollation.NullDirection nullOrder; +// if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_FIRST) { +// nullOrder = RelFieldCollation.NullDirection.FIRST; +// } else if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_LAST) { +// nullOrder = RelFieldCollation.NullDirection.LAST; +// } else { +// throw new SemanticException("Unexpected null ordering option: " +// + nullObASTExpr.getType()); +// } +// +// // 2.5 Add to field collations +// fieldCollations.add(new RelFieldCollation(fieldIndex, order, nullOrder)); +// } +// +// // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel +// // for top constraining Sel +// RelNode obInputRel = srcRel; +// if (!newVCLst.isEmpty()) { +// List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), +// new Function() { +// @Override +// public RexNode apply(RelDataTypeField input) { +// return new RexInputRef(input.getIndex(), input.getType()); +// } +// }); +// RowResolver obSyntheticProjectRR = new RowResolver(); +// if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { +// throw new CalciteSemanticException( +// "Duplicates detected when adding columns to RR: see previous message", +// UnsupportedFeature.Duplicates_in_RR); +// } +// int vcolPos = inputRR.getRowSchema().getSignature().size(); +// for (Pair astTypePair : vcASTTypePairs) { +// obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( +// SemanticAnalyzer.getColumnInternalName(vcolPos), astTypePair.getValue(), null, +// false)); +// vcolPos++; +// } +// obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), +// obSyntheticProjectRR, srcRel); +// +// if (outermostOB) { +// if (!RowResolver.add(outputRR, inputRR)) { +// throw new CalciteSemanticException( +// "Duplicates detected when adding columns to RR: see previous message", +// UnsupportedFeature.Duplicates_in_RR); +// } +// +// } else { +// if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { +// throw new CalciteSemanticException( +// "Duplicates detected when adding columns to RR: see previous message", +// UnsupportedFeature.Duplicates_in_RR); +// } +// } +// } else { +// if (!RowResolver.add(outputRR, inputRR)) { +// throw new CalciteSemanticException( +// "Duplicates detected when adding columns to RR: see previous message", +// UnsupportedFeature.Duplicates_in_RR); +// } +// } +// +// // 4. Construct SortRel +// RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); +// RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); +// sortRel = new HiveSortLimit(cluster, traitSet, obInputRel, canonizedCollation, null, null); +// +// // 5. Update the maps +// // NOTE: Output RR for SortRel is considered same as its input; we may +// // end up not using VC that is present in sort rel. Also note that +// // rowtype of sortrel is the type of it child; if child happens to be +// // synthetic project that we introduced then that projectrel would +// // contain the vc. +// ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR); +// relToHiveRR.put(sortRel, outputRR); +// relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); +// +// if (selectOutputRR != null) { +// List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), +// new Function() { +// @Override +// public RexNode apply(RelDataTypeField input) { +// return new RexInputRef(input.getIndex(), input.getType()); +// } +// }); +// List selectedRefs = Lists.newArrayList(); +// for (int index = 0; index < selectOutputRR.getColumnInfos().size(); index++) { +// selectedRefs.add(originalInputRefs.get(index)); +// } +// // We need to add select since order by schema may have more columns than result schema. +// returnRel = genSelectRelNode(selectedRefs, selectOutputRR, sortRel); +// } else { +// returnRel = sortRel; +// } +// } +// return returnRel; +// } + + private RelNode genOBLogicalPlan(QB qb, Pair selPair, + boolean outermostOB) throws SemanticException { + OBLogicalPlan obLogicalPlan = beginGenOBLogicalPlan(qb, selPair, outermostOB); + if (obLogicalPlan == null) { + return null; + } + + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); + RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(obLogicalPlan.getCanonizedCollation())); + RelNode sortRel = new HiveSortLimit( + cluster, traitSet, obLogicalPlan.getObInputRel(), canonizedCollation, null, null); + + return endGenOBLogicalPlan(obLogicalPlan, sortRel); + } + + private RelNode genSBLogicalPlan(QB qb, Pair selPair, + boolean outermostOB) throws SemanticException { + OBLogicalPlan obLogicalPlan = beginGenOBLogicalPlan(qb, selPair, outermostOB); + if (obLogicalPlan == null) { + return null; + } + + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); + RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(obLogicalPlan.getCanonizedCollation())); + List joinKeyPositions = new ArrayList<>(canonizedCollation.getFieldCollations().size()); + ImmutableList.Builder builder = ImmutableList.builder(); + for (RelFieldCollation relFieldCollation : canonizedCollation.getFieldCollations()) { + int index = relFieldCollation.getFieldIndex(); + joinKeyPositions.add(index); + builder.add(cluster.getRexBuilder().makeInputRef(obLogicalPlan.getObInputRel(), index)); + } + + RelNode sortRel = HiveSortExchange.create( + obLogicalPlan.getObInputRel(), + new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, joinKeyPositions), + canonizedCollation, + builder.build()); + + return endGenOBLogicalPlan(obLogicalPlan, sortRel); + } + /** - * Generate OB RelNode and input Select RelNode that should be used to + * Generate SB RelNode and input Select RelNode that should be used to * introduce top constraining Project. If Input select RelNode is not * present then don't introduce top constraining select. * @@ -3899,7 +4164,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException * @return RelNode OB RelNode * @throws SemanticException */ - private RelNode genOBLogicalPlan(QB qb, Pair selPair, + private OBLogicalPlan beginGenOBLogicalPlan(QB qb, Pair selPair, boolean outermostOB) throws SemanticException { // selPair.getKey() is the operator right before OB // selPair.getValue() is RR which only contains columns needed in result @@ -3907,210 +4172,215 @@ private RelNode genOBLogicalPlan(QB qb, Pair selPair, RelNode srcRel = selPair.getKey(); RowResolver selectOutputRR = selPair.getValue(); RelNode sortRel = null; - RelNode returnRel = null; QBParseInfo qbp = getQBParseInfo(qb); String dest = qbp.getClauseNames().iterator().next(); - ASTNode obAST = qbp.getOrderByForClause(dest); - - if (obAST != null) { - // 1. OB Expr sanity test - // in strict mode, in the presence of order by, limit must be - // specified - Integer limit = qb.getParseInfo().getDestLimit(dest); - if (limit == null) { - String error = StrictChecks.checkNoLimit(conf); - if (error != null) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, error)); - } + ASTNode sbAST = qbp.getSortByForClause(dest); + + if (sbAST == null) { + return null; + } + + // 1. OB Expr sanity test + // in strict mode, in the presence of order by, limit must be + // specified + Integer limit = qb.getParseInfo().getDestLimit(dest); + if (limit == null) { + String error = StrictChecks.checkNoLimit(conf); + if (error != null) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(sbAST, error)); } + } - // 2. Walk through OB exprs and extract field collations and additional - // virtual columns needed - final List newVCLst = new ArrayList(); - final List fieldCollations = Lists.newArrayList(); - int fieldIndex = 0; + // 2. Walk through OB exprs and extract field collations and additional + // virtual columns needed + final List newVCLst = new ArrayList(); + final List fieldCollations = Lists.newArrayList(); + int fieldIndex = 0; - List obASTExprLst = obAST.getChildren(); - ASTNode obASTExpr; - ASTNode nullObASTExpr; - List> vcASTTypePairs = new ArrayList>(); - RowResolver inputRR = relToHiveRR.get(srcRel); - RowResolver outputRR = new RowResolver(); - - RexNode rnd; - RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), - relToHiveColNameCalcitePosMap.get(srcRel), 0, false); - int srcRelRecordSz = srcRel.getRowType().getFieldCount(); - - for (int i = 0; i < obASTExprLst.size(); i++) { - // 2.1 Convert AST Expr to ExprNode - obASTExpr = (ASTNode) obASTExprLst.get(i); - nullObASTExpr = (ASTNode) obASTExpr.getChild(0); - ASTNode ref = (ASTNode) nullObASTExpr.getChild(0); - Map astToExprNDescMap = null; - ExprNodeDesc obExprNDesc = null; - - boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); - boolean isObyByPos = isBothByPos - || HiveConf.getBoolVar(conf, ConfVars.HIVE_ORDERBY_POSITION_ALIAS); - // replace each of the position alias in ORDERBY with the actual column - if (ref != null && ref.getToken().getType() == HiveParser.Number) { - if (isObyByPos) { - int pos = Integer.parseInt(ref.getText()); - if (pos > 0 && pos <= selectOutputRR.getColumnInfos().size()) { - // fieldIndex becomes so simple - // Note that pos starts from 1 while fieldIndex starts from 0; - fieldIndex = pos - 1; - } else { - throw new SemanticException( - ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg("Position alias: " + pos - + " does not exist\n" + "The Select List is indexed from 1 to " - + selectOutputRR.getColumnInfos().size())); - } - } else { // if not using position alias and it is a number. - LOG.warn("Using constant number " - + ref.getText() - + " in order by. If you try to use position alias when hive.orderby.position.alias is false, the position alias will be ignored."); - } - } else { - // first try to get it from select - // in case of udtf, selectOutputRR may be null. - if (selectOutputRR != null) { - try { - astToExprNDescMap = genAllExprNodeDesc(ref, selectOutputRR); - obExprNDesc = astToExprNDescMap.get(ref); - } catch (SemanticException ex) { - // we can tolerate this as this is the previous behavior - LOG.debug("Can not find column in " + ref.getText() + ". The error msg is " - + ex.getMessage()); - } + List sbASTExprLst = sbAST.getChildren(); + ASTNode sbASTExpr; + ASTNode nullSbASTExpr; + List> vcASTTypePairs = new ArrayList>(); + RowResolver inputRR = relToHiveRR.get(srcRel); + RowResolver outputRR = new RowResolver(); + + RexNode rnd; + RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), + relToHiveColNameCalcitePosMap.get(srcRel), 0, false); + int srcRelRecordSz = srcRel.getRowType().getFieldCount(); + + List joinKeyPositions = new ArrayList<>(); + ImmutableList.Builder joinExprsBuilder = new ImmutableList.Builder<>(); + + for (int i = 0; i < sbASTExprLst.size(); i++) { + // 2.1 Convert AST Expr to ExprNode + sbASTExpr = (ASTNode) sbASTExprLst.get(i); + nullSbASTExpr = (ASTNode) sbASTExpr.getChild(0); + ASTNode ref = (ASTNode) nullSbASTExpr.getChild(0); + Map astToExprNDescMap = null; + ExprNodeDesc obExprNDesc = null; + + boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); + boolean isObyByPos = isBothByPos + || HiveConf.getBoolVar(conf, ConfVars.HIVE_ORDERBY_POSITION_ALIAS); + // replace each of the position alias in ORDERBY with the actual column + if (ref != null && ref.getToken().getType() == HiveParser.Number) { + if (isObyByPos) { + int pos = Integer.parseInt(ref.getText()); + if (pos > 0 && pos <= selectOutputRR.getColumnInfos().size()) { + // fieldIndex becomes so simple + // Note that pos starts from 1 while fieldIndex starts from 0; + fieldIndex = pos - 1; + } else { + throw new SemanticException( + ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg("Position alias: " + pos + + " does not exist\n" + "The Select List is indexed from 1 to " + + selectOutputRR.getColumnInfos().size())); } - // then try to get it from all - if (obExprNDesc == null) { - astToExprNDescMap = genAllExprNodeDesc(ref, inputRR); + } else { // if not using position alias and it is a number. + LOG.warn("Using constant number " + + ref.getText() + + " in order by. If you try to use position alias when hive.orderby.position.alias is false, the position alias will be ignored."); + } + } else { + // first try to get it from select + // in case of udtf, selectOutputRR may be null. + if (selectOutputRR != null) { + try { + astToExprNDescMap = genAllExprNodeDesc(ref, selectOutputRR); obExprNDesc = astToExprNDescMap.get(ref); - } - if (obExprNDesc == null) { - throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); - } - // 2.2 Convert ExprNode to RexNode - rnd = converter.convert(obExprNDesc); - - // 2.3 Determine the index of ob expr in child schema - // NOTE: Calcite can not take compound exprs in OB without it being - // present in the child (& hence we add a child Project Rel) - if (rnd instanceof RexInputRef) { - fieldIndex = ((RexInputRef) rnd).getIndex(); - } else { - fieldIndex = srcRelRecordSz + newVCLst.size(); - newVCLst.add(rnd); - vcASTTypePairs.add(new Pair(ref, obExprNDesc.getTypeInfo())); + } catch (SemanticException ex) { + // we can tolerate this as this is the previous behavior + LOG.debug("Can not find column in " + ref.getText() + ". The error msg is " + + ex.getMessage()); } } - - // 2.4 Determine the Direction of order by - RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; - if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { - order = RelFieldCollation.Direction.ASCENDING; + // then try to get it from all + if (obExprNDesc == null) { + astToExprNDescMap = genAllExprNodeDesc(ref, inputRR); + obExprNDesc = astToExprNDescMap.get(ref); + } + if (obExprNDesc == null) { + throw new SemanticException("Invalid order by expression: " + sbASTExpr.toString()); } - RelFieldCollation.NullDirection nullOrder; - if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_FIRST) { - nullOrder = RelFieldCollation.NullDirection.FIRST; - } else if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_LAST) { - nullOrder = RelFieldCollation.NullDirection.LAST; + // 2.2 Convert ExprNode to RexNode + rnd = converter.convert(obExprNDesc); + + // 2.3 Determine the index of ob expr in child schema + // NOTE: Calcite can not take compound exprs in OB without it being + // present in the child (& hence we add a child Project Rel) + if (rnd instanceof RexInputRef) { + fieldIndex = ((RexInputRef) rnd).getIndex(); } else { - throw new SemanticException("Unexpected null ordering option: " - + nullObASTExpr.getType()); + fieldIndex = srcRelRecordSz + newVCLst.size(); + newVCLst.add(rnd); + vcASTTypePairs.add(new Pair(ref, obExprNDesc.getTypeInfo())); } - - // 2.5 Add to field collations - fieldCollations.add(new RelFieldCollation(fieldIndex, order, nullOrder)); + joinExprsBuilder.add(rnd); } - // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel - // for top constraining Sel - RelNode obInputRel = srcRel; - if (!newVCLst.isEmpty()) { - List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), - new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); - RowResolver obSyntheticProjectRR = new RowResolver(); - if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); - } - int vcolPos = inputRR.getRowSchema().getSignature().size(); - for (Pair astTypePair : vcASTTypePairs) { - obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( - SemanticAnalyzer.getColumnInternalName(vcolPos), astTypePair.getValue(), null, - false)); - vcolPos++; - } - obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), - obSyntheticProjectRR, srcRel); + // 2.4 Determine the Direction of order by + RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; + if (sbASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { + order = RelFieldCollation.Direction.ASCENDING; + } + RelFieldCollation.NullDirection nullOrder; + if (nullSbASTExpr.getType() == HiveParser.TOK_NULLS_FIRST) { + nullOrder = RelFieldCollation.NullDirection.FIRST; + } else if (nullSbASTExpr.getType() == HiveParser.TOK_NULLS_LAST) { + nullOrder = RelFieldCollation.NullDirection.LAST; + } else { + throw new SemanticException("Unexpected null ordering option: " + + nullSbASTExpr.getType()); + } - if (outermostOB) { - if (!RowResolver.add(outputRR, inputRR)) { - throw new CalciteSemanticException( + // 2.5 Add to field collations + fieldCollations.add(new RelFieldCollation(fieldIndex, order, nullOrder)); + joinKeyPositions.add(fieldIndex); + } + + // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel + // for top constraining Sel + RelNode obInputRel = srcRel; + if (!newVCLst.isEmpty()) { + List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), + new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + RowResolver obSyntheticProjectRR = new RowResolver(); + if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { + throw new CalciteSemanticException( "Duplicates detected when adding columns to RR: see previous message", UnsupportedFeature.Duplicates_in_RR); - } + } + int vcolPos = inputRR.getRowSchema().getSignature().size(); + for (Pair astTypePair : vcASTTypePairs) { + obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( + SemanticAnalyzer.getColumnInternalName(vcolPos), astTypePair.getValue(), null, + false)); + vcolPos++; + } + obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), + obSyntheticProjectRR, srcRel); - } else { - if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); - } + if (outermostOB) { + if (!RowResolver.add(outputRR, inputRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); } + } else { - if (!RowResolver.add(outputRR, inputRR)) { + if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); } } + } else { + if (!RowResolver.add(outputRR, inputRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); + } + } - // 4. Construct SortRel - RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); - RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); - sortRel = new HiveSortLimit(cluster, traitSet, obInputRel, canonizedCollation, null, null); - - // 5. Update the maps - // NOTE: Output RR for SortRel is considered same as its input; we may - // end up not using VC that is present in sort rel. Also note that - // rowtype of sortrel is the type of it child; if child happens to be - // synthetic project that we introduced then that projectrel would - // contain the vc. - ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR); - relToHiveRR.put(sortRel, outputRR); - relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); + return new OBLogicalPlan(obInputRel, fieldCollations, selectOutputRR, outputRR, srcRel); + } - if (selectOutputRR != null) { - List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), - new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); - List selectedRefs = Lists.newArrayList(); - for (int index = 0; index < selectOutputRR.getColumnInfos().size(); index++) { - selectedRefs.add(originalInputRefs.get(index)); - } - // We need to add select since order by schema may have more columns than result schema. - returnRel = genSelectRelNode(selectedRefs, selectOutputRR, sortRel); - } else { - returnRel = sortRel; + private RelNode endGenOBLogicalPlan(OBLogicalPlan obLogicalPlan, RelNode sortRel) throws CalciteSemanticException { + // 5. Update the maps + // NOTE: Output RR for SortRel is considered same as its input; we may + // end up not using VC that is present in sort rel. Also note that + // rowtype of sortrel is the type of it child; if child happens to be + // synthetic project that we introduced then that projectrel would + // contain the vc. + ImmutableMap hiveColNameCalcitePosMap = + buildHiveToCalciteColumnMap(obLogicalPlan.getOutputRR()); + relToHiveRR.put(sortRel, obLogicalPlan.getOutputRR()); + relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); + + if (obLogicalPlan.getSelectOutputRR() != null) { + List originalInputRefs = Lists.transform(obLogicalPlan.getSrcRel().getRowType().getFieldList(), + new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + List selectedRefs = Lists.newArrayList(); + for (int index = 0; index < obLogicalPlan.getSelectOutputRR().getColumnInfos().size(); index++) { + selectedRefs.add(originalInputRefs.get(index)); } + // We need to add select since order by schema may have more columns than result schema. + return genSelectRelNode(selectedRefs, obLogicalPlan.getSelectOutputRR(), sortRel); + } else { + return sortRel; } - return returnRel; } private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { @@ -4722,6 +4992,7 @@ private void setQueryHints(QB qb) throws SemanticException { } else { String dest = qbp.getClauseNames().iterator().next(); ASTNode obAST = qbp.getOrderByForClause(dest); + ASTNode sbAST = qbp.getSortByForClause(dest); RowResolver originalRR = null; // We only support limited unselected column following by order by. @@ -4732,7 +5003,7 @@ private void setQueryHints(QB qb) throws SemanticException { // If DISTINCT is present, it is not possible to ORDER BY unselected // columns, and in fact adding all columns would change the behavior of // DISTINCT, so we bypass this logic. - if (obAST != null + if ((obAST != null || sbAST != null) && selExprList.getToken().getType() != HiveParser.TOK_SELECTDI && !isAllColRefRewrite) { // 1. OB Expr sanity test @@ -4957,6 +5228,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, RelNode gbHavingRel = null; RelNode selectRel = null; RelNode obRel = null; + RelNode sbRel = null; RelNode limitRel = null; // First generate all the opInfos for the elements in the from clause @@ -5048,6 +5320,10 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, obRel = genOBLogicalPlan(qb, selPair, outerMostQB); srcRel = (obRel == null) ? srcRel : obRel; + // 6. Build Rel for OB Clause + sbRel = genSBLogicalPlan(qb, selPair, outerMostQB); + srcRel = (sbRel == null) ? srcRel : sbRel; + // 7. Build Rel for Limit Clause limitRel = genLimitLogicalPlan(qb, srcRel); srcRel = (limitRel == null) ? srcRel : limitRel; @@ -5195,6 +5471,43 @@ private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { } } + private static class OBLogicalPlan { + private final RelNode obInputRel; + private final List canonizedCollation; + private final RowResolver selectOutputRR; + private final RowResolver outputRR; + private final RelNode srcRel; + + private OBLogicalPlan(RelNode obInputRel, List canonizedCollation, + RowResolver selectOutputRR, RowResolver outputRR, RelNode srcRel) { + this.obInputRel = obInputRel; + this.canonizedCollation = canonizedCollation; + this.selectOutputRR = selectOutputRR; + this.outputRR = outputRR; + this.srcRel = srcRel; + } + + public RelNode getObInputRel() { + return obInputRel; + } + + public List getCanonizedCollation() { + return canonizedCollation; + } + + public RowResolver getSelectOutputRR() { + return selectOutputRR; + } + + public RowResolver getOutputRR() { + return outputRR; + } + + public RelNode getSrcRel() { + return srcRel; + } + } + @Override protected Table getTableObjectByName(String tabName, boolean throwException) throws HiveException { String[] names = Utilities.getDbTableName(tabName); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java index 31068cb8c3..7b25030442 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java @@ -52,7 +52,7 @@ * statements (since they are actually inserts) and then doing some patch up to make them work as * updates and deletes instead. */ -public abstract class RewriteSemanticAnalyzer extends SemanticAnalyzer { +public abstract class RewriteSemanticAnalyzer extends CalcitePlanner { protected static final Logger LOG = LoggerFactory.getLogger(RewriteSemanticAnalyzer.class); protected boolean useSuper = false; diff --git ql/src/test/queries/clientpositive/authorization_view_disable_cbo_1.q ql/src/test/queries/clientpositive/authorization_view_disable_cbo_1.q index be50b69830..f228ccd609 100644 --- ql/src/test/queries/clientpositive/authorization_view_disable_cbo_1.q +++ ql/src/test/queries/clientpositive/authorization_view_disable_cbo_1.q @@ -64,7 +64,7 @@ set hive.cbo.enable=true; --although cbo is enabled, it will not succeed. -select key from v_n10 sort by key limit 10; +select key from v_n10 cluster by key limit 10; select key from (select key as key from src_autho_test_n9 union all select key from v_n10 cluster by key)subq diff --git ql/src/test/queries/clientpositive/sort.q ql/src/test/queries/clientpositive/sort.q index cab2712810..93b2faf8bd 100644 --- ql/src/test/queries/clientpositive/sort.q +++ ql/src/test/queries/clientpositive/sort.q @@ -1,7 +1,11 @@ --! qt:dataset:src -- SORT_QUERY_RESULTS -EXPLAIN -SELECT x.* FROM SRC x SORT BY key; +SELECT x.key, x.value FROM SRC x ORDER BY 1; + +SELECT x.key, x.value FROM SRC x SORT BY 1; + +--EXPLAIN +--SELECT x.* FROM SRC x SORT BY key; SELECT x.* FROM SRC x SORT BY key; diff --git ql/src/test/results/clientpositive/llap/explainuser_1.q.out ql/src/test/results/clientpositive/llap/explainuser_1.q.out index b82a055079..5331200522 100644 --- ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -271,7 +271,7 @@ POSTHOOK: query: drop table src_orc_merge_test_part_n1 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@src_orc_merge_test_part_n1 POSTHOOK: Output: default@src_orc_merge_test_part_n1 -Warning: Shuffle Join MERGEJOIN[18][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( select src1.key as k1, src1.value as v1, @@ -296,7 +296,7 @@ select src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Plan not optimized by CBO because the statement has sort by +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) @@ -313,25 +313,27 @@ Stage-0 <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_12] Group By Operator [GBY_11] (rows=1 width=8) - Output:["_col0"],aggregations:["sum(hash(_col0,_col1,_col2,_col3))"] - Merge Join Operator [MERGEJOIN_18] (rows=27556 width=356) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Map 1 [XPROD_EDGE] llap - XPROD_EDGE [RS_6] - Select Operator [SEL_2] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=166 width=178) - predicate:(key < 10) - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 4 [XPROD_EDGE] llap - XPROD_EDGE [RS_7] - Select Operator [SEL_5] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=166 width=178) - predicate:(key < 10) - TableScan [TS_3] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Output:["_col0"],aggregations:["sum(_col0)"] + Select Operator [SEL_9] (rows=27556 width=356) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_18] (rows=27556 width=356) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 1 [XPROD_EDGE] llap + XPROD_EDGE [RS_6] + Select Operator [SEL_2] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_16] (rows=166 width=178) + predicate:(UDFToDouble(key) < 10.0D) + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [XPROD_EDGE] llap + XPROD_EDGE [RS_7] + Select Operator [SEL_5] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_17] (rows=166 width=178) + predicate:(UDFToDouble(key) < 10.0D) + TableScan [TS_3] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key PREHOOK: type: QUERY @@ -3716,11 +3718,11 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_CTAS1_n1 -Plan not optimized by CBO because the statement has sort by +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Stage-3 @@ -3731,36 +3733,34 @@ Stage-3 Move Operator Stage-1 Reducer 4 llap - File Output Operator [FS_17] - Group By Operator [GBY_15] (rows=1 width=880) + File Output Operator [FS_18] + Group By Operator [GBY_16] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_8] + File Output Operator [FS_9] table:{"name:":"default.nzhang_CTAS1_n1"} - Limit [LIM_7] (rows=10 width=178) + Limit [LIM_8] (rows=10 width=178) Number of rows:10 - Select Operator [SEL_6] (rows=10 width=178) + Select Operator [SEL_7] (rows=10 width=178) Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_5] - Top N Key Operator [TNK_18] (rows=10 width=178) - keys:_col0, _col1,top n:10 - Limit [LIM_4] (rows=10 width=178) - Number of rows:10 - Select Operator [SEL_3] (rows=500 width=178) - Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500 width=178) - Output:["_col0","_col1"] - Top N Key Operator [TNK_19] (rows=500 width=178) - keys:key, value,top n:10 - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_14] - Select Operator [SEL_13] (rows=10 width=178) + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_6] + Limit [LIM_5] (rows=10 width=178) + Number of rows:10 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_19] (rows=500 width=178) + keys:key, value,top n:10 + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_15] + Select Operator [SEL_14] (rows=10 width=178) Output:["col1","col2"] - Please refer to the previous Limit [LIM_7] + Please refer to the previous Limit [LIM_8] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -3787,11 +3787,11 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_ctas3_n1 -Plan not optimized by CBO because the statement has sort by +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Stage-3 @@ -3802,36 +3802,34 @@ Stage-3 Move Operator Stage-1 Reducer 4 llap - File Output Operator [FS_17] - Group By Operator [GBY_15] (rows=1 width=880) + File Output Operator [FS_18] + Group By Operator [GBY_16] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - File Output Operator [FS_8] + File Output Operator [FS_9] table:{"name:":"default.nzhang_ctas3_n1"} - Limit [LIM_7] (rows=10 width=192) + Limit [LIM_8] (rows=10 width=192) Number of rows:10 - Select Operator [SEL_6] (rows=10 width=192) + Select Operator [SEL_7] (rows=10 width=192) Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_5] - Top N Key Operator [TNK_18] (rows=10 width=192) - keys:_col0, _col1,top n:10 - Limit [LIM_4] (rows=10 width=192) - Number of rows:10 - Select Operator [SEL_3] (rows=500 width=192) - Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500 width=192) - Output:["_col0","_col1"] - Top N Key Operator [TNK_19] (rows=500 width=178) - keys:(key / 2), concat(value, '_con'),top n:10 - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_14] - Select Operator [SEL_13] (rows=10 width=192) + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_6] + Limit [LIM_5] (rows=10 width=192) + Number of rows:10 + Select Operator [SEL_3] (rows=500 width=192) + Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500 width=192) + Output:["_col0","_col1"] + Top N Key Operator [TNK_19] (rows=500 width=178) + keys:(UDFToDouble(key) / 2.0D), concat(value, '_con'),top n:10 + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_15] + Select Operator [SEL_14] (rows=10 width=192) Output:["col1","col2"] - Please refer to the previous Limit [LIM_7] + Please refer to the previous Limit [LIM_8] Stage-2 Dependency Collection{} Please refer to the previous Stage-1 @@ -3896,7 +3894,7 @@ select src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Plan not optimized by CBO because the statement has sort by +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) @@ -3919,7 +3917,7 @@ Stage-0 Select Operator [SEL_2] (rows=166 width=178) Output:["_col0","_col1"] Filter Operator [FIL_13] (rows=166 width=178) - predicate:(key < 10) + predicate:(UDFToDouble(key) < 10.0D) TableScan [TS_0] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 4 [CUSTOM_SIMPLE_EDGE] llap @@ -3927,7 +3925,7 @@ Stage-0 Select Operator [SEL_5] (rows=166 width=178) Output:["_col0","_col1"] Filter Operator [FIL_14] (rows=166 width=178) - predicate:(key < 10) + predicate:(UDFToDouble(key) < 10.0D) TableScan [TS_3] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 2c9c7aa8ac..30a812cf65 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -268,7 +268,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -319,50 +318,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 1, 4, 5, 0] - Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col5 (type: int), _col2 (type: date) - null sort order: zz - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE - top n: 25 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int, col 1:date - native: true - Reduce Output Operator - key expressions: _col5 (type: int), _col2 (type: date) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -378,7 +333,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 1, 4, 5, 0] - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -1259,7 +1214,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1310,50 +1264,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col4 (type: int), _col5 (type: date) - null sort order: zz - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - top n: 25 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int, col 1:date - native: true - Reduce Output Operator - key expressions: _col4 (type: int), _col5 (type: date) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1369,7 +1279,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -2322,7 +2232,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2373,50 +2282,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col4 (type: int), _col5 (type: timestamp) - null sort order: zz - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - top n: 25 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int, col 1:timestamp - native: true - Reduce Output Operator - key expressions: _col4 (type: int), _col5 (type: timestamp) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -2432,7 +2297,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -2941,7 +2806,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2992,50 +2856,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 1, 4, 5, 0] - Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col5 (type: int), _col2 (type: date) - null sort order: zz - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE - top n: 25 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int, col 1:date - native: true - Reduce Output Operator - key expressions: _col5 (type: int), _col2 (type: date) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -3051,7 +2871,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 1, 4, 5, 0] - Statistics: Num rows: 25 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 39456 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -3932,7 +3752,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3983,50 +3802,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: timestamp), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: date) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col4 (type: int), _col5 (type: date) - null sort order: zz - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - top n: 25 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int, col 1:date - native: true - Reduce Output Operator - key expressions: _col4 (type: int), _col5 (type: date) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -4042,7 +3817,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -4995,7 +4770,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5046,50 +4820,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: date), VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 25 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col4 (type: int), _col5 (type: timestamp) - null sort order: zz - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - top n: 25 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int, col 1:timestamp - native: true - Reduce Output Operator - key expressions: _col4 (type: int), _col5 (type: timestamp) - null sort order: zz - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -5105,7 +4835,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: