diff --git hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out
index f92371d..332c5e6 100644
--- hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out
+++ hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out
@@ -399,22 +399,36 @@ explain select * from hbase_pushdown
where (case when key<'90' then 2 else 4 end) > 3
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: hbase_pushdown
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (not NVL((key < '90'),false)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: hbase_pushdown
- filterExpr: (key >= '90') (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: -- with a predicate which is under an OR, so it should
-- be ignored by pushdown
diff --git hbase-handler/src/test/results/positive/hbase_pushdown.q.out hbase-handler/src/test/results/positive/hbase_pushdown.q.out
index d957a7c..39c03eb 100644
--- hbase-handler/src/test/results/positive/hbase_pushdown.q.out
+++ hbase-handler/src/test/results/positive/hbase_pushdown.q.out
@@ -297,7 +297,7 @@ STAGE PLANS:
alias: hbase_pushdown
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
- predicate: (key <> 90) (type: boolean)
+ predicate: (not NVL((key = 90),false)) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string)
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java
new file mode 100644
index 0000000..1c64d64
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.calcite.plan.Context;
+import org.apache.calcite.plan.Contexts;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptSchema;
+import org.apache.calcite.rel.RelCollations;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.server.CalciteServerStatement;
+import org.apache.calcite.tools.FrameworkConfig;
+import org.apache.calcite.tools.Frameworks;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.tools.RelBuilderFactory;
+
+
+/**
+ * Builder for relational expressions in Hive.
+ *
+ *
{@code RelBuilder} does not make possible anything that you could not
+ * also accomplish by calling the factory methods of the particular relational
+ * expression. But it makes common tasks more straightforward and concise.
+ *
+ *
It is not thread-safe.
+ */
+public class HiveRelBuilder extends RelBuilder {
+
+ private HiveRelBuilder(Context context, RelOptCluster cluster, RelOptSchema relOptSchema) {
+ super(context, cluster, relOptSchema);
+ }
+
+ /** Creates a RelBuilder. */
+ public static RelBuilder create(FrameworkConfig config) {
+ final RelOptCluster[] clusters = {null};
+ final RelOptSchema[] relOptSchemas = {null};
+ Frameworks.withPrepare(
+ new Frameworks.PrepareAction(config) {
+ public Void apply(RelOptCluster cluster, RelOptSchema relOptSchema,
+ SchemaPlus rootSchema, CalciteServerStatement statement) {
+ clusters[0] = cluster;
+ relOptSchemas[0] = relOptSchema;
+ return null;
+ }
+ });
+ return new HiveRelBuilder(config.getContext(), clusters[0], relOptSchemas[0]);
+ }
+
+ /** Creates a {@link RelBuilderFactory}, a partially-created RelBuilder.
+ * Just add a {@link RelOptCluster} and a {@link RelOptSchema} */
+ public static RelBuilderFactory proto(final Context context) {
+ return new RelBuilderFactory() {
+ public RelBuilder create(RelOptCluster cluster, RelOptSchema schema) {
+ return new HiveRelBuilder(context, cluster, schema);
+ }
+ };
+ }
+
+ /** Creates a {@link RelBuilderFactory} that uses a given set of factories. */
+ public static RelBuilderFactory proto(Object... factories) {
+ return proto(Contexts.of(factories));
+ }
+
+ @Override
+ public RelBuilder filter(Iterable extends RexNode> predicates) {
+ final RexNode x = HiveRexUtil.simplify(cluster.getRexBuilder(),
+ RexUtil.composeConjunction(cluster.getRexBuilder(), predicates, false));
+ if (!x.isAlwaysTrue()) {
+ final RelNode input = build();
+ final RelNode filter = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter(input, x);
+ return this.push(filter);
+ }
+ return this;
+ }
+
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
index 971b446..cf93ed8 100644
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
@@ -41,7 +41,6 @@
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlKind;
-import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.tools.RelBuilderFactory;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
@@ -78,7 +77,7 @@
new HiveSetOpFactoryImpl();
public static final RelBuilderFactory HIVE_BUILDER =
- RelBuilder.proto(
+ HiveRelBuilder.proto(
Contexts.of(HIVE_PROJECT_FACTORY,
HIVE_FILTER_FACTORY,
HIVE_JOIN_FACTORY,
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java
index 73a67a8..d466378 100644
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java
@@ -134,25 +134,18 @@ private static RexNode simplifyCase(RexBuilder rexBuilder, RexCall call,
final List operands = call.getOperands();
final List newOperands = new ArrayList<>();
final Set values = new HashSet<>();
- boolean constainsNullableCase = false;
for (int i = 0; i < operands.size(); i++) {
RexNode operand = operands.get(i);
if (RexUtil.isCasePredicate(call, i)) {
if (operand.isAlwaysTrue()) {
// Predicate is always TRUE. Make value the ELSE and quit.
newOperands.add(operands.get(i + 1));
- if (operand.getType().isNullable()) {
- constainsNullableCase = true;
- }
break;
} else if (operand.isAlwaysFalse() || RexUtil.isNull(operand)) {
// Predicate is always FALSE or NULL. Skip predicate and value.
++i;
continue;
}
- if (operand.getType().isNullable()) {
- constainsNullableCase = true;
- }
} else {
if (unknownAsFalse && RexUtil.isNull(operand)) {
values.add(rexBuilder.makeLiteral(false).toString());
@@ -167,19 +160,52 @@ private static RexNode simplifyCase(RexBuilder rexBuilder, RexCall call,
return rexBuilder.makeCast(call.getType(), newOperands.get(newOperands.size() - 1));
}
trueFalse:
- if (call.getType().getSqlTypeName() == SqlTypeName.BOOLEAN &&
- (!constainsNullableCase || unknownAsFalse)) {
+ if (call.getType().getSqlTypeName() == SqlTypeName.BOOLEAN) {
// Optimize CASE where every branch returns constant true or constant
- // false:
+ // false.
+ final List> pairs =
+ casePairs(rexBuilder, newOperands);
+ // 1) Possible simplification if unknown is treated as false:
+ // CASE
+ // WHEN p1 THEN TRUE
+ // WHEN p2 THEN TRUE
+ // ELSE FALSE
+ // END
+ // can be rewritten to: (p1 or p2)
+ if (unknownAsFalse) {
+ final List terms = new ArrayList<>();
+ int pos = 0;
+ for (; pos < pairs.size(); pos++) {
+ // True block
+ Pair pair = pairs.get(pos);
+ if (!pair.getValue().isAlwaysTrue()) {
+ break;
+ }
+ terms.add(pair.getKey());
+ }
+ for (; pos < pairs.size(); pos++) {
+ // False block
+ Pair pair = pairs.get(pos);
+ if (!pair.getValue().isAlwaysFalse() && !RexUtil.isNull(pair.getValue())) {
+ break;
+ }
+ }
+ if (pos == pairs.size()) {
+ return RexUtil.composeDisjunction(rexBuilder, terms, false);
+ }
+ }
+ // 2) Another simplification
// CASE
// WHEN p1 THEN TRUE
// WHEN p2 THEN FALSE
// WHEN p3 THEN TRUE
// ELSE FALSE
// END
- final List> pairs =
- casePairs(rexBuilder, newOperands);
+ // if p1...pn cannot be nullable
for (Ord> pair : Ord.zip(pairs)) {
+ if (pair.e.getKey().getType().isNullable()) {
+ break trueFalse;
+ }
if (!pair.e.getValue().isAlwaysTrue()
&& !pair.e.getValue().isAlwaysFalse()
&& (!unknownAsFalse || !RexUtil.isNull(pair.e.getValue()))) {
diff --git ql/src/test/results/clientpositive/fold_case.q.out ql/src/test/results/clientpositive/fold_case.q.out
index f57da79..ec99197 100644
--- ql/src/test/results/clientpositive/fold_case.q.out
+++ ql/src/test/results/clientpositive/fold_case.q.out
@@ -67,10 +67,10 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key <> '238') (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ predicate: (not NVL((key = '238'),false)) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
mode: hash
@@ -370,15 +370,15 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key <> '238') (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ predicate: (not NVL((key = '238'),false)) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: CASE WHEN ((key = '238')) THEN (null) ELSE (false) END (type: boolean)
outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -448,10 +448,10 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key <> '238') (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ predicate: CASE WHEN ((key = '238')) THEN (null) ELSE (true) END (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
mode: hash
diff --git ql/src/test/results/clientpositive/fold_when.q.out ql/src/test/results/clientpositive/fold_when.q.out
index 4f3eb14..d56de7f 100644
--- ql/src/test/results/clientpositive/fold_when.q.out
+++ ql/src/test/results/clientpositive/fold_when.q.out
@@ -328,15 +328,15 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key <> '238') (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ predicate: (not NVL((key = '238'),false)) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -435,15 +435,15 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key <> '11') (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ predicate: (not NVL((key = '11'),false)) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat