diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveLimitProjectPullUpConstantsRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveLimitProjectPullUpConstantsRule.java new file mode 100644 index 0000000..c046ae7 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveLimitProjectPullUpConstantsRule.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.calcite.plan.RelOptPredicateList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.util.Pair; +import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.ImmutableList; + +/** + * Planner rule that pulls up constant keys through a + * SortLimit operator. + */ +public class HiveLimitProjectPullUpConstantsRule extends RelOptRule { + + protected static final Logger LOG = LoggerFactory.getLogger(HiveLimitProjectPullUpConstantsRule.class); + + + public static final HiveLimitProjectPullUpConstantsRule INSTANCE = + new HiveLimitProjectPullUpConstantsRule(HiveSortLimit.class, RelNode.class, + HiveRelFactories.HIVE_BUILDER); + + private HiveLimitProjectPullUpConstantsRule( + Class sortClass, + Class inputClass, + RelBuilderFactory relBuilderFactory) { + super(operand(sortClass, + operand(inputClass, any())), + relBuilderFactory, null); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final Sort sort = call.rel(0); + + final int count = sort.getInput().getRowType().getFieldCount(); + if (count == 1) { + // No room for optimization since we cannot convert to an empty + // Project operator. + return; + } + + final RexBuilder rexBuilder = sort.getCluster().getRexBuilder(); + final RelMetadataQuery mq = RelMetadataQuery.instance(); + final RelOptPredicateList predicates = mq.getPulledUpPredicates(sort.getInput()); + if (predicates == null) { + return; + } + + Map constants = HiveReduceExpressionsRule.predicateConstants( + RexNode.class, rexBuilder, predicates); + + // None of the expressions are constant. Nothing to do. + if (constants.isEmpty()) { + return; + } + + if (count == constants.size()) { + // At least a single item in project is required. + final Map map = new HashMap<>(constants); + map.remove(map.keySet().iterator().next()); + constants = map; + } + + // Create expressions for Project operators before and after the Sort + List fields = sort.getInput().getRowType().getFieldList(); + List> newChildExprs = new ArrayList<>(); + List topChildExprs = new ArrayList<>(); + List topChildExprsFields = new ArrayList<>(); + for (int i = 0; i < count ; i++) { + RexNode expr = rexBuilder.makeInputRef(sort.getInput(), i); + RelDataTypeField field = fields.get(i); + if (constants.containsKey(expr)) { + topChildExprs.add(constants.get(expr)); + topChildExprsFields.add(field.getName()); + } else { + newChildExprs.add(Pair.of(expr, field.getName())); + topChildExprs.add(expr); + topChildExprsFields.add(field.getName()); + } + } + + // Update field collations + final Mappings.TargetMapping mapping = + RelOptUtil.permutation(Pair.left(newChildExprs), sort.getInput().getRowType()).inverse(); + List fieldCollations = new ArrayList<>(); + for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) { + final int target = mapping.getTargetOpt(fc.getFieldIndex()); + if (target < 0) { + // It is a constant, we can ignore it + continue; + } + fieldCollations.add(fc.copy(target)); + } + + // Update top Project positions + topChildExprs = ImmutableList.copyOf(RexUtil.apply(mapping, topChildExprs)); + + // Create new Project-Sort-Project sequence + final RelBuilder relBuilder = call.builder(); + relBuilder.push(sort.getInput()); + relBuilder.project(Pair.left(newChildExprs), Pair.right(newChildExprs)); + final ImmutableList sortFields = + relBuilder.fields(RelCollations.of(fieldCollations)); + relBuilder.sortLimit(sort.offset == null ? -1 : RexLiteral.intValue(sort.offset), + sort.fetch == null ? -1 : RexLiteral.intValue(sort.fetch), sortFields); + relBuilder.project(topChildExprs, topChildExprsFields); + + call.transformTo(relBuilder.build()); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java index 9006f45..2fe9b75 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -396,6 +396,131 @@ protected static void findReducibleExps(RelDataTypeFactory typeFactory, assert constExps.size() == addCasts.size(); } + /** Creates a map containing each (e, constant) pair that occurs within + * a predicate list. + * + * @param clazz Class of expression that is considered constant + * @param rexBuilder Rex builder + * @param predicates Predicate list + * @param what to consider a constant: {@link RexLiteral} to use a narrow + * definition of constant, or {@link RexNode} to use + * {@link RexUtil#isConstant(RexNode)} + * @return Map from values to constants + */ + public static ImmutableMap predicateConstants( + Class clazz, RexBuilder rexBuilder, RelOptPredicateList predicates) { + // We cannot use an ImmutableMap.Builder here. If there are multiple entries + // with the same key (e.g. "WHERE deptno = 1 AND deptno = 2"), it doesn't + // matter which we take, so the latter will replace the former. + // The basic idea is to find all the pairs of RexNode = RexLiteral + // (1) If 'predicates' contain a non-EQUALS, we bail out. + // (2) It is OK if a RexNode is equal to the same RexLiteral several times, + // (e.g. "WHERE deptno = 1 AND deptno = 1") + // (3) It will return false if there are inconsistent constraints (e.g. + // "WHERE deptno = 1 AND deptno = 2") + final Map map = new HashMap<>(); + final Set excludeSet = new HashSet<>(); + for (RexNode predicate : predicates.pulledUpPredicates) { + gatherConstraints(clazz, predicate, map, excludeSet, rexBuilder); + } + final ImmutableMap.Builder builder = + ImmutableMap.builder(); + for (Map.Entry entry : map.entrySet()) { + RexNode rexNode = entry.getKey(); + if (!overlap(rexNode, excludeSet)) { + builder.put(rexNode, entry.getValue()); + } + } + return builder.build(); + } + + private static void gatherConstraints(Class clazz, + RexNode predicate, Map map, Set excludeSet, + RexBuilder rexBuilder) { + if (predicate.getKind() != SqlKind.EQUALS) { + decompose(excludeSet, predicate); + return; + } + final List operands = ((RexCall) predicate).getOperands(); + if (operands.size() != 2) { + decompose(excludeSet, predicate); + return; + } + // if it reaches here, we have rexNode equals rexNode + final RexNode left = operands.get(0); + final RexNode right = operands.get(1); + // note that literals are immutable too and they can only be compared through + // values. + gatherConstraint(clazz, left, right, map, excludeSet, rexBuilder); + gatherConstraint(clazz, right, left, map, excludeSet, rexBuilder); + } + + /** Returns whether a value of {@code type2} can be assigned to a variable + * of {@code type1}. + * + *

For example: + *

    + *
  • {@code canAssignFrom(BIGINT, TINYINT)} returns {@code true}
  • + *
  • {@code canAssignFrom(TINYINT, BIGINT)} returns {@code false}
  • + *
  • {@code canAssignFrom(BIGINT, VARCHAR)} returns {@code false}
  • + *
+ */ + private static boolean canAssignFrom(RelDataType type1, RelDataType type2) { + final SqlTypeName name1 = type1.getSqlTypeName(); + final SqlTypeName name2 = type2.getSqlTypeName(); + if (name1.getFamily() == name2.getFamily()) { + switch (name1.getFamily()) { + case NUMERIC: + return name1.compareTo(name2) >= 0; + default: + return true; + } + } + return false; + } + + private static void gatherConstraint(Class clazz, + RexNode left, RexNode right, Map map, Set excludeSet, + RexBuilder rexBuilder) { + if (!clazz.isInstance(right)) { + return; + } + if (!RexUtil.isConstant(right)) { + return; + } + C constant = clazz.cast(right); + if (excludeSet.contains(left)) { + return; + } + final C existedValue = map.get(left); + if (existedValue == null) { + switch (left.getKind()) { + case CAST: + // Convert "CAST(c) = literal" to "c = literal", as long as it is a + // widening cast. + final RexNode operand = ((RexCall) left).getOperands().get(0); + if (canAssignFrom(left.getType(), operand.getType())) { + final RexNode castRight = + rexBuilder.makeCast(operand.getType(), constant); + if (castRight instanceof RexLiteral) { + left = operand; + constant = clazz.cast(castRight); + } + } + } + map.put(left, constant); + } else { + if (existedValue instanceof RexLiteral + && constant instanceof RexLiteral + && !((RexLiteral) existedValue).getValue() + .equals(((RexLiteral) constant).getValue())) { + // we found conflicting values, e.g. left = 10 and left = 20 + map.remove(left); + excludeSet.add(left); + } + } + } + protected static ImmutableMap predicateConstants( RelOptPredicateList predicates) { // We cannot use an ImmutableMap.Builder here. If there are multiple entries diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 8e00e0b..5b11138 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -153,6 +153,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveLimitProjectPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; @@ -1163,6 +1164,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN); rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN); rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN); + rules.add(HiveLimitProjectPullUpConstantsRule.INSTANCE); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, rules.toArray(new RelOptRule[rules.size()])); diff --git ql/src/test/queries/clientpositive/cbo_input26.q ql/src/test/queries/clientpositive/cbo_input26.q new file mode 100644 index 0000000..40050f9 --- /dev/null +++ ql/src/test/queries/clientpositive/cbo_input26.q @@ -0,0 +1,54 @@ +set hive.mapred.mode=nonstrict; +set hive.optimize.constant.propagation=false; + +explain +select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +explain +select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; diff --git ql/src/test/results/clientpositive/cbo_input26.q.out ql/src/test/results/clientpositive/cbo_input26.q.out new file mode 100644 index 0000000..5c4c771 --- /dev/null +++ ql/src/test/results/clientpositive/cbo_input26.q.out @@ -0,0 +1,596 @@ +PREHOOK: query: explain +select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '14' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 11 +10 val_10 2008-04-08 11 +100 val_100 2008-04-08 11 +PREHOOK: query: explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string), _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string), _col0 (type: string), '14' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +2008-04-08 0 11 +2008-04-08 0 11 +2008-04-08 0 11 +2008-04-08 10 11 +2008-04-08 100 11 +PREHOOK: query: explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string), _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string), _col0 (type: string), '14' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +2008-04-08 0 11 +2008-04-08 0 11 +2008-04-08 0 11 +2008-04-08 10 11 +2008-04-08 100 11 +PREHOOK: query: explain +select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +165 2008-04-08 val_165 +27 2008-04-08 val_27 +311 2008-04-08 val_311 +86 2008-04-08 val_86 +238 2008-04-08 val_238