diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java index efde665..68b2a10 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java @@ -17,8 +17,11 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import java.math.BigDecimal; + import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; @@ -65,7 +68,7 @@ public boolean matches(RelOptRuleCall call) { // If the bottom operator is not synthetic and it does not contain a limit, // we will bail out; we do not want to end up with limits all over the tree if (topSortLimit.isRuleCreated() && !bottomSortLimit.isRuleCreated() && - bottomSortLimit.fetch == null) { + !HiveCalciteUtil.limitRelNode(bottomSortLimit)) { return false; } @@ -77,17 +80,38 @@ public void onMatch(RelOptRuleCall call) { final HiveSortLimit topSortLimit = call.rel(0); final HiveSortLimit bottomSortLimit = call.rel(1); - // Lowest limit + final RexNode newOffset; final RexNode newLimit; - if (bottomSortLimit.fetch != null && RexLiteral.intValue(topSortLimit.fetch) - >= RexLiteral.intValue(bottomSortLimit.fetch)) { - newLimit = bottomSortLimit.fetch; + if (HiveCalciteUtil.limitRelNode(bottomSortLimit)) { + final RexBuilder rexBuilder = topSortLimit.getCluster().getRexBuilder(); + int topOffset = topSortLimit.offset == null ? 0 : RexLiteral.intValue(topSortLimit.offset); + int topLimit = RexLiteral.intValue(topSortLimit.fetch); + int bottomOffset = bottomSortLimit.offset == null ? 0 : RexLiteral.intValue(bottomSortLimit.offset); + int bottomLimit = RexLiteral.intValue(bottomSortLimit.fetch); + // if offset in top operator does not surpass the number of tuples coming from below + if (topOffset < bottomOffset + bottomLimit) { + newOffset = bottomOffset + topOffset == 0 ? null : + rexBuilder.makeExactLiteral(BigDecimal.valueOf(bottomOffset + topOffset)); + // newLimit + if (topOffset + topLimit > bottomLimit) { + newLimit = rexBuilder.makeExactLiteral(BigDecimal.valueOf(bottomLimit - topOffset)); + } else { + newLimit = topSortLimit.fetch; + } + } else { + // if offset in top operator surpasses the number of tuples coming from below, + // we need to create a new limit 0 + newOffset = null; + newLimit = rexBuilder.makeExactLiteral(BigDecimal.valueOf(0)); + } } else { + // Bottom operator does not contain offset/fetch + newOffset = topSortLimit.offset; newLimit = topSortLimit.fetch; } final HiveSortLimit newSort = bottomSortLimit.copy(bottomSortLimit.getTraitSet(), - bottomSortLimit.getInput(), bottomSortLimit.collation, null, newLimit); + bottomSortLimit.getInput(), bottomSortLimit.collation, newOffset, newLimit); call.transformTo(newSort); } diff --git ql/src/test/queries/clientpositive/limit_join_transpose.q ql/src/test/queries/clientpositive/limit_join_transpose.q index 98eefe9..4bb8fb5 100644 --- ql/src/test/queries/clientpositive/limit_join_transpose.q +++ ql/src/test/queries/clientpositive/limit_join_transpose.q @@ -60,6 +60,7 @@ from src src1 right outer join ( on src1.key = src2.key limit 1; + set hive.optimize.limitjointranspose.reductionpercentage=1f; set hive.optimize.limitjointranspose.reductiontuples=0; @@ -98,3 +99,106 @@ from src src1 right outer join ( on src1.key = src2.key order by src2.key limit 1; + + +set hive.mapred.mode=nonstrict; +set hive.optimize.limitjointranspose=false; + +explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1; + +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1; + + +set hive.optimize.limitjointranspose=true; +set hive.optimize.limitjointranspose.reductionpercentage=0.0001f; +set hive.optimize.limitjointranspose.reductiontuples=10; + +explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1; + +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1; + + +set hive.optimize.limitjointranspose.reductionpercentage=0.1f; +set hive.optimize.limitjointranspose.reductiontuples=10; + +explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1; + +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1; + +explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 offset 1; + +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 offset 1; + + +set hive.optimize.limitjointranspose.reductionpercentage=1f; +set hive.optimize.limitjointranspose.reductiontuples=0; + +explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 offset 1; + +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 offset 1; + +explain +select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 offset 1; + +select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 offset 1; diff --git ql/src/test/results/clientpositive/limit_join_transpose.q.out ql/src/test/results/clientpositive/limit_join_transpose.q.out index 3181cdf..fa10a52 100644 --- ql/src/test/results/clientpositive/limit_join_transpose.q.out +++ ql/src/test/results/clientpositive/limit_join_transpose.q.out @@ -874,3 +874,892 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 0 val_0 +PREHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 +PREHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 +PREHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 offset 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 offset 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 offset 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 offset 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 offset 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 offset 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 offset 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 offset 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 offset 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain +select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 offset 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 offset 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 offset 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 offset 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0