diff --git hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out index f92371d..332c5e6 100644 --- hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out +++ hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out @@ -399,22 +399,36 @@ explain select * from hbase_pushdown where (case when key<'90' then 2 else 4 end) > 3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hbase_pushdown + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (not NVL((key < '90'),false)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: hbase_pushdown - filterExpr: (key >= '90') (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: -- with a predicate which is under an OR, so it should -- be ignored by pushdown diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java new file mode 100644 index 0000000..1c64d64 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.plan.Context; +import org.apache.calcite.plan.Contexts; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptSchema; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.server.CalciteServerStatement; +import org.apache.calcite.tools.FrameworkConfig; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.tools.RelBuilderFactory; + + +/** + * Builder for relational expressions in Hive. + * + *

{@code RelBuilder} does not make possible anything that you could not + * also accomplish by calling the factory methods of the particular relational + * expression. But it makes common tasks more straightforward and concise. + * + *

It is not thread-safe. + */ +public class HiveRelBuilder extends RelBuilder { + + private HiveRelBuilder(Context context, RelOptCluster cluster, RelOptSchema relOptSchema) { + super(context, cluster, relOptSchema); + } + + /** Creates a RelBuilder. */ + public static RelBuilder create(FrameworkConfig config) { + final RelOptCluster[] clusters = {null}; + final RelOptSchema[] relOptSchemas = {null}; + Frameworks.withPrepare( + new Frameworks.PrepareAction(config) { + public Void apply(RelOptCluster cluster, RelOptSchema relOptSchema, + SchemaPlus rootSchema, CalciteServerStatement statement) { + clusters[0] = cluster; + relOptSchemas[0] = relOptSchema; + return null; + } + }); + return new HiveRelBuilder(config.getContext(), clusters[0], relOptSchemas[0]); + } + + /** Creates a {@link RelBuilderFactory}, a partially-created RelBuilder. + * Just add a {@link RelOptCluster} and a {@link RelOptSchema} */ + public static RelBuilderFactory proto(final Context context) { + return new RelBuilderFactory() { + public RelBuilder create(RelOptCluster cluster, RelOptSchema schema) { + return new HiveRelBuilder(context, cluster, schema); + } + }; + } + + /** Creates a {@link RelBuilderFactory} that uses a given set of factories. */ + public static RelBuilderFactory proto(Object... factories) { + return proto(Contexts.of(factories)); + } + + @Override + public RelBuilder filter(Iterable predicates) { + final RexNode x = HiveRexUtil.simplify(cluster.getRexBuilder(), + RexUtil.composeConjunction(cluster.getRexBuilder(), predicates, false)); + if (!x.isAlwaysTrue()) { + final RelNode input = build(); + final RelNode filter = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter(input, x); + return this.push(filter); + } + return this; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java index 971b446..cf93ed8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java @@ -41,7 +41,6 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.calcite.util.ImmutableBitSet; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; @@ -78,7 +77,7 @@ new HiveSetOpFactoryImpl(); public static final RelBuilderFactory HIVE_BUILDER = - RelBuilder.proto( + HiveRelBuilder.proto( Contexts.of(HIVE_PROJECT_FACTORY, HIVE_FILTER_FACTORY, HIVE_JOIN_FACTORY, diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java index a5dcffb..3d061d3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java @@ -114,25 +114,18 @@ private static RexNode simplifyCase(RexBuilder rexBuilder, RexCall call, final List operands = call.getOperands(); final List newOperands = new ArrayList<>(); final Set values = new HashSet<>(); - boolean constainsNullableCase = false; for (int i = 0; i < operands.size(); i++) { RexNode operand = operands.get(i); if (RexUtil.isCasePredicate(call, i)) { if (operand.isAlwaysTrue()) { // Predicate is always TRUE. Make value the ELSE and quit. newOperands.add(operands.get(i + 1)); - if (operand.getType().isNullable()) { - constainsNullableCase = true; - } break; } else if (operand.isAlwaysFalse() || RexUtil.isNull(operand)) { // Predicate is always FALSE or NULL. Skip predicate and value. ++i; continue; } - if (operand.getType().isNullable()) { - constainsNullableCase = true; - } } else { if (unknownAsFalse && RexUtil.isNull(operand)) { values.add(rexBuilder.makeLiteral(false).toString()); @@ -147,8 +140,7 @@ private static RexNode simplifyCase(RexBuilder rexBuilder, RexCall call, return rexBuilder.makeCast(call.getType(), newOperands.get(newOperands.size() - 1)); } trueFalse: - if (call.getType().getSqlTypeName() == SqlTypeName.BOOLEAN && - (!constainsNullableCase || unknownAsFalse)) { + if (call.getType().getSqlTypeName() == SqlTypeName.BOOLEAN) { // Optimize CASE where every branch returns constant true or constant // false: // CASE @@ -157,9 +149,34 @@ private static RexNode simplifyCase(RexBuilder rexBuilder, RexCall call, // WHEN p3 THEN TRUE // ELSE FALSE // END + // Note that p1...pn cannot be nullable + // Another possible simplification: + // CASE + // WHEN p1 THEN TRUE + // WHEN p2 THEN TRUE + // ELSE FALSE + // END + // can be rewritten to p1 or p2 final List> pairs = casePairs(rexBuilder, newOperands); + RexNode elseValue = pairs.get(pairs.size()-1).getValue(); + if (elseValue.isAlwaysFalse() || + (unknownAsFalse && RexUtil.isNull(elseValue))) { + final List terms = new ArrayList<>(); + for (Ord> pair : Ord.zip(pairs)) { + if (!pair.e.getValue().isAlwaysTrue()) { + break; + } + terms.add(pair.e.getKey()); + } + if (terms.size() == pairs.size() - 1) { + return RexUtil.composeDisjunction(rexBuilder, terms, false); + } + } for (Ord> pair : Ord.zip(pairs)) { + if (pair.e.getKey().getType().isNullable()) { + break trueFalse; + } if (!pair.e.getValue().isAlwaysTrue() && !pair.e.getValue().isAlwaysFalse() && (!unknownAsFalse || !RexUtil.isNull(pair.e.getValue()))) { diff --git ql/src/test/results/clientpositive/fold_case.q.out ql/src/test/results/clientpositive/fold_case.q.out index f57da79..ec99197 100644 --- ql/src/test/results/clientpositive/fold_case.q.out +++ ql/src/test/results/clientpositive/fold_case.q.out @@ -67,10 +67,10 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key <> '238') (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (not NVL((key = '238'),false)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) mode: hash @@ -370,15 +370,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key <> '238') (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (not NVL((key = '238'),false)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CASE WHEN ((key = '238')) THEN (null) ELSE (false) END (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -448,10 +448,10 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key <> '238') (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN ((key = '238')) THEN (null) ELSE (true) END (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) mode: hash diff --git ql/src/test/results/clientpositive/fold_when.q.out ql/src/test/results/clientpositive/fold_when.q.out index 4f3eb14..d56de7f 100644 --- ql/src/test/results/clientpositive/fold_when.q.out +++ ql/src/test/results/clientpositive/fold_when.q.out @@ -328,15 +328,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key <> '238') (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (not NVL((key = '238'),false)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -435,15 +435,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key <> '11') (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (not NVL((key = '11'),false)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat