diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index e9bf3e4..90166f1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -29,6 +29,7 @@ import java.util.BitSet; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.Deque; import java.util.EnumSet; import java.util.HashMap; @@ -2908,7 +2909,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException * top constraining Select * @throws SemanticException */ - private Pair genOBLogicalPlan(QB qb, RelNode srcRel, boolean outermostOB) + private Pair genOBLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel, boolean outermostOB) throws SemanticException { RelNode sortRel = null; RelNode originalOBChild = null; @@ -2931,7 +2932,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException // 2. Walk through OB exprs and extract field collations and additional // virtual columns needed final List newVCLst = new ArrayList(); - final List fieldCollations = Lists.newArrayList(); + final List fieldCollations = Lists.newArrayList(); int fieldIndex = 0; List obASTExprLst = obAST.getChildren(); @@ -2940,12 +2941,16 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException List> vcASTTypePairs = new ArrayList>(); RowResolver inputRR = relToHiveRR.get(srcRel); RowResolver outputRR = new RowResolver(); - + RowResolver starSrcRR = relToHiveRR.get(starSrcRel); + RexNode rnd; - RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), - relToHiveColNameCalcitePosMap.get(srcRel), 0, false); + RexNodeConverter converter = null; + RelNode input = null; int srcRelRecordSz = srcRel.getRowType().getFieldCount(); + List needToAddRexNode = new ArrayList<>(); + List> needToAddvcASTTypePairs = new ArrayList>(); + for (int i = 0; i < obASTExprLst.size(); i++) { // 2.1 Convert AST Expr to ExprNode obASTExpr = (ASTNode) obASTExprLst.get(i); @@ -2954,21 +2959,50 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( obASTExpr, new TypeCheckCtx(inputRR)); ExprNodeDesc obExprNDesc = astToExprNDescMap.get(ref); - if (obExprNDesc == null) - throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); + boolean isFromStar = false; + if (obExprNDesc == null) { + Map alternativeAstToExprNDescMap = TypeCheckProcFactory + .genExprNode(obASTExpr, new TypeCheckCtx(starSrcRR)); + obExprNDesc = alternativeAstToExprNDescMap.get(ref); + if (obExprNDesc == null) { + throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); + } + isFromStar = true; + if (srcRel instanceof HiveProject) { + // there should be only one input for select + input = srcRel.getInput(0); + converter = new RexNodeConverter(cluster, input.getRowType(), + relToHiveColNameCalcitePosMap.get(input), 0, false); + } + } + else { + converter = new RexNodeConverter(cluster, srcRel.getRowType(), + relToHiveColNameCalcitePosMap.get(srcRel), 0, false); + } // 2.2 Convert ExprNode to RexNode rnd = converter.convert(obExprNDesc); - + if (isFromStar) { + needToAddRexNode.add(rnd); + } // 2.3 Determine the index of ob expr in child schema // NOTE: Calcite can not take compound exprs in OB without it being // present in the child (& hence we add a child Project Rel) - if (rnd instanceof RexInputRef) { - fieldIndex = ((RexInputRef) rnd).getIndex(); + int type = 0; + if (isFromStar) { + fieldIndex = srcRelRecordSz + needToAddvcASTTypePairs.size() + newVCLst.size(); + needToAddvcASTTypePairs + .add(new Pair(ref, obExprNDesc.getTypeInfo())); + type = 1; } else { - fieldIndex = srcRelRecordSz + newVCLst.size(); - newVCLst.add(rnd); - vcASTTypePairs.add(new Pair(ref, obExprNDesc.getTypeInfo())); + if (rnd instanceof RexInputRef) { + fieldIndex = ((RexInputRef) rnd).getIndex(); + } else { + fieldIndex = srcRelRecordSz + needToAddvcASTTypePairs.size() + newVCLst.size(); + newVCLst.add(rnd); + vcASTTypePairs.add(new Pair(ref, obExprNDesc.getTypeInfo())); + type = 2; + } } // 2.4 Determine the Direction of order by @@ -2987,27 +3021,42 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException } // 2.5 Add to field collations - fieldCollations.add(new RelFieldCollation(fieldIndex, order, nullOrder)); + fieldCollations.add(new RelFieldCollationWrapper(new RelFieldCollation(fieldIndex, order, + nullOrder), type, fieldIndex)); + } + + // replace the srcRel, add those needed to add + RowResolver selSyntheticProjectRR = inputRR.duplicate(); + RelNode selSyntheticRel = srcRel; + if (!needToAddRexNode.isEmpty()) { + // first rearrange fieldCollations + // the ordering is like this + // --RexInputRef--isFromStar--newVCLst-- + List originalInputRefs = ((HiveProject) srcRel).getChildExps(); + int vcolPos = inputRR.getRowSchema().getSignature().size(); + for (Pair astTypePair : needToAddvcASTTypePairs) { + selSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( + SemanticAnalyzer.getColumnInternalName(vcolPos), astTypePair.getValue(), null, + false)); + vcolPos++; + } + selSyntheticRel = genSelectRelNode(CompositeList.of(originalInputRefs, needToAddRexNode), + selSyntheticProjectRR, input); } // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel // for top constraining Sel - RelNode obInputRel = srcRel; + RelNode obInputRel = selSyntheticRel; if (!newVCLst.isEmpty()) { - List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), + List originalInputRefs = Lists.transform(selSyntheticRel.getRowType().getFieldList(), new Function() { @Override public RexNode apply(RelDataTypeField input) { return new RexInputRef(input.getIndex(), input.getType()); } }); - RowResolver obSyntheticProjectRR = new RowResolver(); - if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); - } - int vcolPos = inputRR.getRowSchema().getSignature().size(); + RowResolver obSyntheticProjectRR = selSyntheticProjectRR.duplicate(); + int vcolPos = selSyntheticProjectRR.getRowSchema().getSignature().size(); for (Pair astTypePair : vcASTTypePairs) { obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( SemanticAnalyzer.getColumnInternalName(vcolPos), astTypePair.getValue(), null, @@ -3015,34 +3064,32 @@ public RexNode apply(RelDataTypeField input) { vcolPos++; } obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), - obSyntheticProjectRR, srcRel); - - if (outermostOB) { - if (!RowResolver.add(outputRR, inputRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); - } - - } else { - if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); - } - originalOBChild = srcRel; - } - } else { - if (!RowResolver.add(outputRR, inputRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message", - UnsupportedFeature.Duplicates_in_RR); + obSyntheticProjectRR, selSyntheticRel); + if (!outermostOB) { + originalOBChild = selSyntheticRel; } - } + } // 4. Construct SortRel + final List collations = Lists.newArrayList(); + if (!needToAddRexNode.isEmpty() || !newVCLst.isEmpty()) { + fieldCollations.sort(new Comparator() { + @Override + public int compare(RelFieldCollationWrapper o1, RelFieldCollationWrapper o2) { + if (o1.type != o2.type) { + return o1.type - o2.type; + } else { + return o1.order - o2.order; + } + } + }); + } + for (RelFieldCollationWrapper c : fieldCollations) { + collations.add(c.collation); + } + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); - RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); + RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(collations)); sortRel = new HiveSortLimit(cluster, traitSet, obInputRel, canonizedCollation, null, null); // 5. Update the maps @@ -3051,15 +3098,45 @@ public RexNode apply(RelDataTypeField input) { // rowtype of sortrel is the type of it child; if child happens to be // synthetic project that we introduced then that projectrel would // contain the vc. + outputRR = inputRR.duplicate(); ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap( outputRR, sortRel); relToHiveRR.put(sortRel, outputRR); relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); + + if (!needToAddRexNode.isEmpty() || !newVCLst.isEmpty()) { + List originalInputRefs = Lists.transform(obInputRel.getRowType().getFieldList(), + new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + List selectedRefs = Lists.newArrayList(); + for (int index = 0; index < originalInputRefs.size() - needToAddRexNode.size() + - newVCLst.size(); index++) { + selectedRefs.add(originalInputRefs.get(index)); + } + sortRel = genSelectRelNode(selectedRefs, inputRR.duplicate(), sortRel); + } } return (new Pair(sortRel, originalOBChild)); } + class RelFieldCollationWrapper { + RelFieldCollation collation; + int type; + int order; + + public RelFieldCollationWrapper(RelFieldCollation relFieldCollation, int i, int j) { + collation = relFieldCollation; + type = i; + order = j; + } + + } + private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { HiveRelNode sortRel = null; QBParseInfo qbp = getQBParseInfo(qb); @@ -3856,7 +3933,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, srcRel = (selectRel == null) ? srcRel : selectRel; // 6. Build Rel for OB Clause - Pair obTopProjPair = genOBLogicalPlan(qb, srcRel, outerMostQB); + Pair obTopProjPair = genOBLogicalPlan(qb, srcRel, starSrcRel, outerMostQB); obRel = obTopProjPair.getKey(); RelNode topConstrainingProjArgsRel = obTopProjPair.getValue(); srcRel = (obRel == null) ? srcRel : obRel; diff --git a/ql/src/test/queries/clientpositive/order_by_expr.q b/ql/src/test/queries/clientpositive/order_by_expr.q new file mode 100644 index 0000000..f213a04 --- /dev/null +++ b/ql/src/test/queries/clientpositive/order_by_expr.q @@ -0,0 +1,20 @@ +create table t(a int, b int); + +insert into t values (1,2),(1,2),(1,3),(2,4),(20,-100),(-1000,100),(4,5),(3,7),(8,9); + +select a from t order by b; +select a from t order by 0-b; +select b from t order by 0-b; +select b from t order by a, 0-b; +select b from t order by a+1, 0-b; +select b from t order by 0-b, a+1; +explain select b from t order by 0-b, a+1; +select a,b from t order by 0-b; +select a,b from t order by a, a+1, 0-b; +select a,b from t order by 0-b, a+1; +select a+1,b from t order by a, a+1, 0-b; +select a+1 as c, b from t order by a, a+1, 0-b; +select a, a+1 as c, b from t order by a, a+1, 0-b; +select a, a+1 as c, b, 2*b from t order by a, a+1, 0-b; +explain select a, a+1 as c, b, 2*b from t order by a, a+1, 0-b; +select a, a+1 as c, b, 2*b from t order by c+1, 0-b; diff --git a/ql/src/test/results/clientpositive/order_by_expr.q.out b/ql/src/test/results/clientpositive/order_by_expr.q.out new file mode 100644 index 0000000..6a0a014 --- /dev/null +++ b/ql/src/test/results/clientpositive/order_by_expr.q.out @@ -0,0 +1,340 @@ +PREHOOK: query: create table t(a int, b int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t(a int, b int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values (1,2),(1,2),(1,3),(2,4),(20,-100),(-1000,100),(4,5),(3,7),(8,9) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (1,2),(1,2),(1,3),(2,4),(20,-100),(-1000,100),(4,5),(3,7),(8,9) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select a from t order by b +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select a from t order by b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +20 +1 +1 +1 +2 +4 +3 +8 +-1000 +PREHOOK: query: select a from t order by 0-b +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select a from t order by 0-b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +-1000 +8 +3 +4 +2 +1 +1 +1 +20 +PREHOOK: query: select b from t order by 0-b +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select b from t order by 0-b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +100 +9 +7 +5 +4 +3 +2 +2 +-100 +PREHOOK: query: select b from t order by a, 0-b +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select b from t order by a, 0-b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +100 +3 +2 +2 +4 +7 +5 +9 +-100 +PREHOOK: query: select b from t order by a+1, 0-b +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select b from t order by a+1, 0-b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +100 +3 +2 +2 +4 +7 +5 +9 +-100 +PREHOOK: query: select b from t order by 0-b, a+1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select b from t order by 0-b, a+1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +100 +9 +7 +5 +4 +3 +2 +2 +-100 +PREHOOK: query: explain select b from t order by 0-b, a+1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select b from t order by 0-b, a+1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 9 Data size: 37 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int), (a + 1) (type: int), (0 - b) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 37 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 9 Data size: 37 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 37 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 37 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a,b from t order by 0-b +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select a,b from t order by 0-b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +-1000 100 +8 9 +3 7 +4 5 +2 4 +1 3 +1 2 +1 2 +20 -100 +PREHOOK: query: select a,b from t order by a, a+1, 0-b +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select a,b from t order by a, a+1, 0-b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +-1000 100 +1 3 +1 2 +1 2 +2 4 +3 7 +4 5 +8 9 +20 -100 +PREHOOK: query: select a,b from t order by 0-b, a+1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select a,b from t order by 0-b, a+1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +-1000 100 +8 9 +3 7 +4 5 +2 4 +1 3 +1 2 +1 2 +20 -100 +PREHOOK: query: select a+1,b from t order by a, a+1, 0-b +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select a+1,b from t order by a, a+1, 0-b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +-999 100 +2 3 +2 2 +2 2 +3 4 +4 7 +5 5 +9 9 +21 -100 +PREHOOK: query: select a+1 as c, b from t order by a, a+1, 0-b +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select a+1 as c, b from t order by a, a+1, 0-b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +-999 100 +2 3 +2 2 +2 2 +3 4 +4 7 +5 5 +9 9 +21 -100 +PREHOOK: query: select a, a+1 as c, b from t order by a, a+1, 0-b +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select a, a+1 as c, b from t order by a, a+1, 0-b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +-1000 -999 100 +1 2 3 +1 2 2 +1 2 2 +2 3 4 +3 4 7 +4 5 5 +8 9 9 +20 21 -100 +PREHOOK: query: select a, a+1 as c, b, 2*b from t order by a, a+1, 0-b +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select a, a+1 as c, b, 2*b from t order by a, a+1, 0-b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +-1000 -999 100 200 +1 2 3 6 +1 2 2 4 +1 2 2 4 +2 3 4 8 +3 4 7 14 +4 5 5 10 +8 9 9 18 +20 21 -100 -200 +PREHOOK: query: explain select a, a+1 as c, b, 2*b from t order by a, a+1, 0-b +PREHOOK: type: QUERY +POSTHOOK: query: explain select a, a+1 as c, b, 2*b from t order by a, a+1, 0-b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 9 Data size: 37 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), (2 * b) (type: int), (a + 1) (type: int), (0 - b) (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 9 Data size: 37 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col4 (type: int), _col5 (type: int) + sort order: +++ + Statistics: Num rows: 9 Data size: 37 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 37 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 37 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, a+1 as c, b, 2*b from t order by c+1, 0-b +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select a, a+1 as c, b, 2*b from t order by c+1, 0-b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +-1000 -999 100 200 +1 2 3 6 +1 2 2 4 +1 2 2 4 +2 3 4 8 +3 4 7 14 +4 5 5 10 +8 9 9 18 +20 21 -100 -200