commit 9d0e9bd19e240767e90a90112b9ade528c36cbff Author: Owen O'Malley Date: Fri May 22 10:10:28 2015 -0700 HIVE-10553. Remove parquet from the sarg api. diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java index 5c36564..fbbd5ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java @@ -29,7 +29,12 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder; +import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory; import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher; +import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; @@ -46,6 +51,7 @@ import parquet.filter2.compat.FilterCompat; import parquet.filter2.compat.RowGroupFilter; +import parquet.filter2.predicate.FilterApi; import parquet.filter2.predicate.FilterPredicate; import parquet.hadoop.ParquetFileReader; import parquet.hadoop.ParquetInputFormat; @@ -147,9 +153,10 @@ public ParquetRecordReaderWrapper( return null; } - FilterPredicate p = - SearchArgumentFactory.create(Utilities.deserializeExpression(serializedPushdown)) - .toFilterPredicate(); + SearchArgument sarg = + SearchArgumentFactory.create(Utilities.deserializeExpression + (serializedPushdown)); + FilterPredicate p = toFilterPredicate(sarg); if (p != null) { LOG.debug("Predicate filter for parquet is " + p.toString()); ParquetInputFormat.setFilterPredicate(conf, p); @@ -308,4 +315,93 @@ protected ParquetInputSplit getSplit( public List getFiltedBlocks() { return filtedBlocks; } + + /** + * Translate the search argument to the filter predicate parquet used + * @return translate the sarg into a filter predicate + */ + public static FilterPredicate toFilterPredicate(SearchArgument sarg) { + return translate(sarg.getExpression(), + sarg.getLeaves()); + } + + private static boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) { + return (op == PredicateLeaf.Operator.IN) || + (op == PredicateLeaf.Operator.BETWEEN); + } + + private static FilterPredicate translate(ExpressionTree root, + List leafs){ + FilterPredicate p = null; + switch (root.getOperator()) { + case OR: + for(ExpressionTree child: root.getChildren()) { + if (p == null) { + p = translate(child, leafs); + } else { + FilterPredicate right = translate(child, leafs); + // constant means no filter, ignore it when it is null + if(right != null){ + p = FilterApi.or(p, right); + } + } + } + return p; + case AND: + for(ExpressionTree child: root.getChildren()) { + if (p == null) { + p = translate(child, leafs); + } else { + FilterPredicate right = translate(child, leafs); + // constant means no filter, ignore it when it is null + if(right != null){ + p = FilterApi.and(p, right); + } + } + } + return p; + case NOT: + FilterPredicate op = translate(root.getChildren().get(0), leafs); + if (op != null) { + return FilterApi.not(op); + } else { + return null; + } + case LEAF: + return buildFilterPredicateFromPredicateLeaf(leafs.get(root.getLeaf())); + case CONSTANT: + return null;// no filter will be executed for constant + default: + throw new IllegalStateException("Unknown operator: " + + root.getOperator()); + } + } + + private static FilterPredicate buildFilterPredicateFromPredicateLeaf + (PredicateLeaf leaf) { + LeafFilterFactory leafFilterFactory = new LeafFilterFactory(); + FilterPredicateLeafBuilder builder; + try { + builder = leafFilterFactory + .getLeafFilterBuilderByType(leaf.getType()); + if (builder == null) { + return null; + } + if (isMultiLiteralsOperator(leaf.getOperator())) { + return builder.buildPredicate(leaf.getOperator(), + leaf.getLiteralList(), + leaf.getColumnName()); + } else { + return builder + .buildPredict(leaf.getOperator(), + leaf.getLiteral(), + leaf.getColumnName()); + } + } catch (Exception e) { + LOG.error("fail to build predicate filter leaf with errors" + e, e); + return null; + } + } + + } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index efe03ab..7997f9e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -22,7 +22,6 @@ import java.sql.Timestamp; import java.util.ArrayDeque; import java.util.ArrayList; -import java.util.Collections; import java.util.Deque; import java.util.HashMap; import java.util.List; @@ -35,8 +34,6 @@ import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder; -import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -64,9 +61,6 @@ import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; -import parquet.filter2.predicate.FilterApi; -import parquet.filter2.predicate.FilterPredicate; - /** * The implementation of SearchArguments. */ @@ -182,199 +176,6 @@ public int hashCode() { } } - static class ExpressionTree { - static enum Operator {OR, AND, NOT, LEAF, CONSTANT} - private final Operator operator; - private final List children; - private final int leaf; - private final TruthValue constant; - - ExpressionTree() { - operator = null; - children = null; - leaf = 0; - constant = null; - } - - ExpressionTree(Operator op, ExpressionTree... kids) { - operator = op; - children = new ArrayList(); - leaf = -1; - this.constant = null; - Collections.addAll(children, kids); - } - - ExpressionTree(int leaf) { - operator = Operator.LEAF; - children = null; - this.leaf = leaf; - this.constant = null; - } - - ExpressionTree(TruthValue constant) { - operator = Operator.CONSTANT; - children = null; - this.leaf = -1; - this.constant = constant; - } - - ExpressionTree(ExpressionTree other) { - this.operator = other.operator; - if (other.children == null) { - this.children = null; - } else { - this.children = new ArrayList(); - for(ExpressionTree child: other.children) { - children.add(new ExpressionTree(child)); - } - } - this.leaf = other.leaf; - this.constant = other.constant; - } - - TruthValue evaluate(TruthValue[] leaves) { - TruthValue result = null; - switch (operator) { - case OR: - for(ExpressionTree child: children) { - result = child.evaluate(leaves).or(result); - } - return result; - case AND: - for(ExpressionTree child: children) { - result = child.evaluate(leaves).and(result); - } - return result; - case NOT: - return children.get(0).evaluate(leaves).not(); - case LEAF: - return leaves[leaf]; - case CONSTANT: - return constant; - default: - throw new IllegalStateException("Unknown operator: " + operator); - } - } - - FilterPredicate translate(List leafs){ - FilterPredicate p = null; - switch (operator) { - case OR: - for(ExpressionTree child: children) { - if (p == null) { - p = child.translate(leafs); - } else { - FilterPredicate right = child.translate(leafs); - // constant means no filter, ignore it when it is null - if(right != null){ - p = FilterApi.or(p, right); - } - } - } - return p; - case AND: - for(ExpressionTree child: children) { - if (p == null) { - p = child.translate(leafs); - } else { - FilterPredicate right = child.translate(leafs); - // constant means no filter, ignore it when it is null - if(right != null){ - p = FilterApi.and(p, right); - } - } - } - return p; - case NOT: - FilterPredicate op = children.get(0).translate(leafs); - if (op != null) { - return FilterApi.not(op); - } else { - return null; - } - case LEAF: - return buildFilterPredicateFromPredicateLeaf(leafs.get(leaf)); - case CONSTANT: - return null;// no filter will be executed for constant - default: - throw new IllegalStateException("Unknown operator: " + operator); - } - } - - private FilterPredicate buildFilterPredicateFromPredicateLeaf(PredicateLeaf leaf) { - LeafFilterFactory leafFilterFactory = new LeafFilterFactory(); - FilterPredicateLeafBuilder builder; - try { - builder = leafFilterFactory - .getLeafFilterBuilderByType(leaf.getType()); - if (builder == null) { - return null; - } - if (isMultiLiteralsOperator(leaf.getOperator())) { - return builder.buildPredicate(leaf.getOperator(), - leaf.getLiteralList(), - leaf.getColumnName()); - } else { - return builder - .buildPredict(leaf.getOperator(), - leaf.getLiteral(), - leaf.getColumnName()); - } - } catch (Exception e) { - LOG.error("fail to build predicate filter leaf with errors" + e, e); - return null; - } - } - - private boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) { - return (op == PredicateLeaf.Operator.IN) || (op == PredicateLeaf.Operator.BETWEEN); - } - - @Override - public String toString() { - StringBuilder buffer = new StringBuilder(); - switch (operator) { - case OR: - buffer.append("(or"); - for(ExpressionTree child: children) { - buffer.append(' '); - buffer.append(child.toString()); - } - buffer.append(')'); - break; - case AND: - buffer.append("(and"); - for(ExpressionTree child: children) { - buffer.append(' '); - buffer.append(child.toString()); - } - buffer.append(')'); - break; - case NOT: - buffer.append("(not "); - buffer.append(children.get(0)); - buffer.append(')'); - break; - case LEAF: - buffer.append("leaf-"); - buffer.append(leaf); - break; - case CONSTANT: - buffer.append(constant); - break; - } - return buffer.toString(); - } - - Operator getOperator() { - return operator; - } - - List getChildren() { - return children; - } - } - static class ExpressionBuilder { // max threshold for CNF conversion. having >8 elements in andList will be converted to maybe private static final int CNF_COMBINATIONS_THRESHOLD = 256; @@ -581,7 +382,7 @@ private ExpressionTree createLeaf(PredicateLeaf.Operator operator, private ExpressionTree negate(ExpressionTree expr) { ExpressionTree result = new ExpressionTree(ExpressionTree.Operator.NOT); - result.children.add(expr); + result.getChildren().add(expr); return result; } @@ -589,7 +390,7 @@ private void addChildren(ExpressionTree result, ExprNodeGenericFuncDesc node, List leafCache) { for(ExprNodeDesc child: node.getChildren()) { - result.children.add(parse(child, leafCache)); + result.getChildren().add(parse(child, leafCache)); } } @@ -665,24 +466,24 @@ private ExpressionTree parse(ExprNodeDesc expression, * nodes of the original expression. */ static ExpressionTree pushDownNot(ExpressionTree root) { - if (root.operator == ExpressionTree.Operator.NOT) { - ExpressionTree child = root.children.get(0); - switch (child.operator) { + if (root.getOperator() == ExpressionTree.Operator.NOT) { + ExpressionTree child = root.getChildren().get(0); + switch (child.getOperator()) { case NOT: - return pushDownNot(child.children.get(0)); + return pushDownNot(child.getChildren().get(0)); case CONSTANT: - return new ExpressionTree(child.constant.not()); + return new ExpressionTree(child.getConstant().not()); case AND: root = new ExpressionTree(ExpressionTree.Operator.OR); - for(ExpressionTree kid: child.children) { - root.children.add(pushDownNot(new + for(ExpressionTree kid: child.getChildren()) { + root.getChildren().add(pushDownNot(new ExpressionTree(ExpressionTree.Operator.NOT, kid))); } break; case OR: root = new ExpressionTree(ExpressionTree.Operator.AND); - for(ExpressionTree kid: child.children) { - root.children.add(pushDownNot(new ExpressionTree + for(ExpressionTree kid: child.getChildren()) { + root.getChildren().add(pushDownNot(new ExpressionTree (ExpressionTree.Operator.NOT, kid))); } break; @@ -690,10 +491,10 @@ static ExpressionTree pushDownNot(ExpressionTree root) { default: break; } - } else if (root.children != null) { + } else if (root.getChildren() != null) { // iterate through children and push down not for each one - for(int i=0; i < root.children.size(); ++i) { - root.children.set(i, pushDownNot(root.children.get(i))); + for(int i=0; i < root.getChildren().size(); ++i) { + root.getChildren().set(i, pushDownNot(root.getChildren().get(i))); } } return root; @@ -707,13 +508,13 @@ static ExpressionTree pushDownNot(ExpressionTree root) { * @return The cleaned up expression */ static ExpressionTree foldMaybe(ExpressionTree expr) { - if (expr.children != null) { - for(int i=0; i < expr.children.size(); ++i) { - ExpressionTree child = foldMaybe(expr.children.get(i)); - if (child.constant == TruthValue.YES_NO_NULL) { - switch (expr.operator) { + if (expr.getChildren() != null) { + for(int i=0; i < expr.getChildren().size(); ++i) { + ExpressionTree child = foldMaybe(expr.getChildren().get(i)); + if (child.getConstant() == TruthValue.YES_NO_NULL) { + switch (expr.getOperator()) { case AND: - expr.children.remove(i); + expr.getChildren().remove(i); i -= 1; break; case OR: @@ -724,10 +525,10 @@ static ExpressionTree foldMaybe(ExpressionTree expr) { expr); } } else { - expr.children.set(i, child); + expr.getChildren().set(i, child); } } - if (expr.children.isEmpty()) { + if (expr.getChildren().isEmpty()) { return new ExpressionTree(TruthValue.YES_NO_NULL); } } @@ -748,15 +549,15 @@ private static void generateAllCombinations(List result, List andList, List nonAndList ) { - List kids = andList.get(0).children; + List kids = andList.get(0).getChildren(); if (result.isEmpty()) { for(ExpressionTree kid: kids) { ExpressionTree or = new ExpressionTree(ExpressionTree.Operator.OR); result.add(or); for(ExpressionTree node: nonAndList) { - or.children.add(new ExpressionTree(node)); + or.getChildren().add(new ExpressionTree(node)); } - or.children.add(kid); + or.getChildren().add(kid); } } else { List work = new ArrayList(result); @@ -764,7 +565,7 @@ private static void generateAllCombinations(List result, for(ExpressionTree kid: kids) { for(ExpressionTree or: work) { ExpressionTree copy = new ExpressionTree(or); - copy.children.add(kid); + copy.getChildren().add(kid); result.add(copy); } } @@ -783,23 +584,23 @@ private static void generateAllCombinations(List result, * @return the normalized expression */ static ExpressionTree convertToCNF(ExpressionTree root) { - if (root.children != null) { + if (root.getChildren() != null) { // convert all of the children to CNF - int size = root.children.size(); + int size = root.getChildren().size(); for(int i=0; i < size; ++i) { - root.children.set(i, convertToCNF(root.children.get(i))); + root.getChildren().set(i, convertToCNF(root.getChildren().get(i))); } - if (root.operator == ExpressionTree.Operator.OR) { + if (root.getOperator() == ExpressionTree.Operator.OR) { // a list of leaves that weren't under AND expressions List nonAndList = new ArrayList(); // a list of AND expressions that we need to distribute List andList = new ArrayList(); - for(ExpressionTree child: root.children) { - if (child.operator == ExpressionTree.Operator.AND) { + for(ExpressionTree child: root.getChildren()) { + if (child.getOperator() == ExpressionTree.Operator.AND) { andList.add(child); - } else if (child.operator == ExpressionTree.Operator.OR) { + } else if (child.getOperator() == ExpressionTree.Operator.OR) { // pull apart the kids of the OR expression - for(ExpressionTree grandkid: child.children) { + for(ExpressionTree grandkid: child.getChildren()) { nonAndList.add(grandkid); } } else { @@ -809,7 +610,7 @@ static ExpressionTree convertToCNF(ExpressionTree root) { if (!andList.isEmpty()) { if (checkCombinationsThreshold(andList)) { root = new ExpressionTree(ExpressionTree.Operator.AND); - generateAllCombinations(root.children, andList, nonAndList); + generateAllCombinations(root.getChildren(), andList, nonAndList); } else { root = new ExpressionTree(TruthValue.YES_NO_NULL); } @@ -822,7 +623,7 @@ static ExpressionTree convertToCNF(ExpressionTree root) { private static boolean checkCombinationsThreshold(List andList) { int numComb = 1; for (ExpressionTree tree : andList) { - numComb *= tree.children.size(); + numComb *= tree.getChildren().size(); if (numComb > CNF_COMBINATIONS_THRESHOLD) { return false; } @@ -837,33 +638,33 @@ private static boolean checkCombinationsThreshold(List andList) * potentially modified children. */ static ExpressionTree flatten(ExpressionTree root) { - if (root.children != null) { + if (root.getChildren() != null) { // iterate through the index, so that if we add more children, // they don't get re-visited - for(int i=0; i < root.children.size(); ++i) { - ExpressionTree child = flatten(root.children.get(i)); + for(int i=0; i < root.getChildren().size(); ++i) { + ExpressionTree child = flatten(root.getChildren().get(i)); // do we need to flatten? - if (child.operator == root.operator && - child.operator != ExpressionTree.Operator.NOT) { + if (child.getOperator() == root.getOperator() && + child.getOperator() != ExpressionTree.Operator.NOT) { boolean first = true; - for(ExpressionTree grandkid: child.children) { + for(ExpressionTree grandkid: child.getChildren()) { // for the first grandkid replace the original parent if (first) { first = false; - root.children.set(i, grandkid); + root.getChildren().set(i, grandkid); } else { - root.children.add(++i, grandkid); + root.getChildren().add(++i, grandkid); } } } else { - root.children.set(i, child); + root.getChildren().set(i, child); } } // if we have a singleton AND or OR, just return the child - if ((root.operator == ExpressionTree.Operator.OR || - root.operator == ExpressionTree.Operator.AND) && - root.children.size() == 1) { - return root.children.get(0); + if ((root.getOperator() == ExpressionTree.Operator.OR || + root.getOperator() == ExpressionTree.Operator.AND) && + root.getChildren().size() == 1) { + return root.getChildren().get(0); } } return root; @@ -882,13 +683,13 @@ private ExpressionTree buildLeafList(ExpressionTree expr, List leafCache, Map lookup) { - if (expr.children != null) { - for(int i=0; i < expr.children.size(); ++i) { - expr.children.set(i, buildLeafList(expr.children.get(i), leafCache, - lookup)); + if (expr.getChildren() != null) { + for(int i=0; i < expr.getChildren().size(); ++i) { + expr.getChildren().set(i, buildLeafList(expr.getChildren().get(i), + leafCache, lookup)); } - } else if (expr.operator == ExpressionTree.Operator.LEAF) { - PredicateLeaf leaf = leafCache.get(expr.leaf); + } else if (expr.getOperator() == ExpressionTree.Operator.LEAF) { + PredicateLeaf leaf = leafCache.get(expr.getLeaf()); ExpressionTree val = lookup.get(leaf); if (val == null) { val = new ExpressionTree(leaves.size()); @@ -969,7 +770,8 @@ public TruthValue evaluate(TruthValue[] leaves) { return expression == null ? TruthValue.YES : expression.evaluate(leaves); } - ExpressionTree getExpression() { + @Override + public ExpressionTree getExpression() { return expression; } @@ -1000,11 +802,6 @@ static SearchArgument fromKryo(String value) { return new Kryo().readObject(input, SearchArgumentImpl.class); } - @Override - public FilterPredicate toFilterPredicate() { - return expression.translate(leaves); - } - private static class BuilderImpl implements Builder { private final Deque currentTree = new ArrayDeque(); @@ -1016,7 +813,7 @@ public Builder startOr() { ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.OR); if (currentTree.size() != 0) { ExpressionTree parent = currentTree.getFirst(); - parent.children.add(node); + parent.getChildren().add(node); } currentTree.addFirst(node); return this; @@ -1027,7 +824,7 @@ public Builder startAnd() { ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.AND); if (currentTree.size() != 0) { ExpressionTree parent = currentTree.getFirst(); - parent.children.add(node); + parent.getChildren().add(node); } currentTree.addFirst(node); return this; @@ -1038,7 +835,7 @@ public Builder startNot() { ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.NOT); if (currentTree.size() != 0) { ExpressionTree parent = currentTree.getFirst(); - parent.children.add(node); + parent.getChildren().add(node); } currentTree.addFirst(node); return this; @@ -1047,12 +844,12 @@ public Builder startNot() { @Override public Builder end() { root = currentTree.removeFirst(); - if (root.children.size() == 0) { + if (root.getChildren().size() == 0) { throw new IllegalArgumentException("Can't create expression " + root + " with no children."); } - if (root.operator == ExpressionTree.Operator.NOT && - root.children.size() != 1) { + if (root.getOperator() == ExpressionTree.Operator.NOT && + root.getChildren().size() != 1) { throw new IllegalArgumentException("Can't create not expression " + root + " with more than 1 child."); } @@ -1121,7 +918,7 @@ public Builder lessThan(String column, Object literal) { new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN, getType(box), column, box, null); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } @@ -1133,7 +930,7 @@ public Builder lessThanEquals(String column, Object literal) { new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS, getType(box), column, box, null); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } @@ -1145,7 +942,7 @@ public Builder equals(String column, Object literal) { new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS, getType(box), column, box, null); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } @@ -1157,7 +954,7 @@ public Builder nullSafeEquals(String column, Object literal) { new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS, getType(box), column, box, null); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } @@ -1177,7 +974,7 @@ public Builder in(String column, Object... literal) { new PredicateLeafImpl(PredicateLeaf.Operator.IN, getType(argList.get(0)), column, null, argList); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } @@ -1188,7 +985,7 @@ public Builder isNull(String column) { new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING, column, null, null); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } @@ -1202,7 +999,7 @@ public Builder between(String column, Object lower, Object upper) { new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN, getType(argList.get(0)), column, null, argList); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } diff --git serde/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java serde/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java new file mode 100644 index 0000000..2dd3a45 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.sarg; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * The inner representation of the SearchArgument. Most users should not + * need this interface, it is only for file formats that need to translate + * the SearchArgument into an internal form. + */ +public class ExpressionTree { + public enum Operator {OR, AND, NOT, LEAF, CONSTANT} + private final Operator operator; + private final List children; + private final int leaf; + private final SearchArgument.TruthValue constant; + + ExpressionTree() { + operator = null; + children = null; + leaf = 0; + constant = null; + } + + ExpressionTree(Operator op, ExpressionTree... kids) { + operator = op; + children = new ArrayList(); + leaf = -1; + this.constant = null; + Collections.addAll(children, kids); + } + + ExpressionTree(int leaf) { + operator = Operator.LEAF; + children = null; + this.leaf = leaf; + this.constant = null; + } + + ExpressionTree(SearchArgument.TruthValue constant) { + operator = Operator.CONSTANT; + children = null; + this.leaf = -1; + this.constant = constant; + } + + ExpressionTree(ExpressionTree other) { + this.operator = other.operator; + if (other.children == null) { + this.children = null; + } else { + this.children = new ArrayList(); + for(ExpressionTree child: other.children) { + children.add(new ExpressionTree(child)); + } + } + this.leaf = other.leaf; + this.constant = other.constant; + } + + public SearchArgument.TruthValue evaluate(SearchArgument.TruthValue[] leaves + ) { + SearchArgument.TruthValue result = null; + switch (operator) { + case OR: + for(ExpressionTree child: children) { + result = child.evaluate(leaves).or(result); + } + return result; + case AND: + for(ExpressionTree child: children) { + result = child.evaluate(leaves).and(result); + } + return result; + case NOT: + return children.get(0).evaluate(leaves).not(); + case LEAF: + return leaves[leaf]; + case CONSTANT: + return constant; + default: + throw new IllegalStateException("Unknown operator: " + operator); + } + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder(); + switch (operator) { + case OR: + buffer.append("(or"); + for(ExpressionTree child: children) { + buffer.append(' '); + buffer.append(child.toString()); + } + buffer.append(')'); + break; + case AND: + buffer.append("(and"); + for(ExpressionTree child: children) { + buffer.append(' '); + buffer.append(child.toString()); + } + buffer.append(')'); + break; + case NOT: + buffer.append("(not "); + buffer.append(children.get(0)); + buffer.append(')'); + break; + case LEAF: + buffer.append("leaf-"); + buffer.append(leaf); + break; + case CONSTANT: + buffer.append(constant); + break; + } + return buffer.toString(); + } + + public Operator getOperator() { + return operator; + } + + public List getChildren() { + return children; + } + + public SearchArgument.TruthValue getConstant() { + return constant; + } + + public int getLeaf() { + return leaf; + } +} + diff --git serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java index 9be54da..beee01a 100644 --- serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java +++ serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.io.sarg; -import parquet.filter2.predicate.FilterPredicate; - import java.util.List; /** @@ -159,6 +157,12 @@ public boolean isNeeded() { public List getLeaves(); /** + * Get the expression tree. This should only needed for file formats that + * need to translate the expression to an internal form. + */ + public ExpressionTree getExpression(); + + /** * Evaluate the entire predicate based on the values for the leaf predicates. * @param leaves the value of each leaf predicate * @return the value of hte entire predicate @@ -177,12 +181,6 @@ public boolean isNeeded() { public String toKryo(); /** - * Translate the search argument to the filter predicate parquet used - * @return - */ - public FilterPredicate toFilterPredicate(); - - /** * A builder object for contexts outside of Hive where it isn't easy to * get a ExprNodeDesc. The user must call startOr, startAnd, or startNot * before adding any leaves.