diff --git pom.xml pom.xml index c694980..543ce71 100644 --- pom.xml +++ pom.xml @@ -145,7 +145,7 @@ requires netty < 3.6.0 we force hadoops version --> 3.4.0.Final - 1.5.0 + 1.6.0rc3 0.12.0 2.5.0 1.0.1 diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index bdc2806..324cbf7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -2393,7 +2393,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, // the stats object is converted to text and comparison is performed. // When STRINGs are converted to other base types, NumberFormat exception // can occur in which case TruthValue.YES_NO_NULL value is returned - Object baseObj = predicate.getLiteral(); + Object baseObj = predicate.getOrcLiteral(); Object minValue = getConvertedStatsObj(min, baseObj); Object maxValue = getConvertedStatsObj(max, baseObj); Object predObj = getBaseObjectForComparison(baseObj, minValue); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java new file mode 100644 index 0000000..6422189 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java @@ -0,0 +1,76 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import parquet.filter2.predicate.FilterApi; +import parquet.filter2.predicate.FilterPredicate; + +import java.util.List; + +/** + * base class for building parquet supported filter predicate in primary types. + */ +public abstract class FilterPredicateLeafBuilder { + /** + * build filter predicate with multiple constants + * + * @param op IN or BETWEEN + * @param constants + * @param columnName + * @return + */ + public FilterPredicate buildPredict(PredicateLeaf.Operator op, List constants, + String columnName) throws Exception { + FilterPredicate result = null; + switch (op) { + case IN: + for (Object o : constants) { + if (result == null) { + result = buildPredict(PredicateLeaf.Operator.EQUALS, o, columnName); + } else { + result = FilterApi.or(result, buildPredict(PredicateLeaf.Operator.EQUALS, o, + columnName)); + } + } + return result; + case BETWEEN: + if (constants.size() != 2) return result; + Object min = constants.get(0); + Object max = constants.get(1); + FilterPredicate lt = FilterApi.not(buildPredict(PredicateLeaf.Operator.LESS_THAN_EQUALS, + min, columnName)); + FilterPredicate gt = buildPredict(PredicateLeaf.Operator.LESS_THAN, max, columnName); + result = FilterApi.and(gt, lt); + return result; + default: + return result; + } + } + + /** + * build predicate with a single constant + * + * @param op EQUALS, NULL_SAFE_EQUALS, LESS_THAN, LESS_THAN_EQUALS, IS_NULL + * @param constant + * @param columnName + * @return null or a FilterPredicate, null means no filter will be executed + */ + public abstract FilterPredicate buildPredict(PredicateLeaf.Operator op, Object constant, + String columnName) throws Exception; +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java new file mode 100644 index 0000000..5b04668 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java @@ -0,0 +1,166 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import parquet.filter2.predicate.FilterApi; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator; +import parquet.filter2.predicate.FilterPredicate; +import parquet.io.api.Binary; + +public class LeafFilterFactory { + class IntFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { + /** + * op consists of EQUALS, NULL_SAFE_EQUALS, LESS_THAN, LESS_THAN_EQUALS, IS_NULL + * @param op + * @param constant + * @param columnName + * @return + */ + @Override + public FilterPredicate buildPredict(Operator op, Object constant, + String columnName) { + switch (op) { + case LESS_THAN: + return FilterApi.lt(FilterApi.intColumn(columnName), + ((Number) constant).intValue()); + case IS_NULL: + case EQUALS: + case NULL_SAFE_EQUALS: + return FilterApi.eq(FilterApi.intColumn(columnName), + (constant == null) ? null : ((Number) constant).intValue()); + case LESS_THAN_EQUALS: + return FilterApi.ltEq(FilterApi.intColumn(columnName), + ((Number) constant).intValue()); + default: + return null; + } + } + } + + class LongFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { + @Override + public FilterPredicate buildPredict(Operator op, Object constant, + String columnName) { + switch (op) { + case LESS_THAN: + return FilterApi.lt(FilterApi.longColumn(columnName), + ((Number) constant).longValue()); + case IS_NULL: + case EQUALS: + case NULL_SAFE_EQUALS: + return FilterApi.eq(FilterApi.longColumn(columnName), + (constant == null) ? null : ((Number) constant).longValue()); + case LESS_THAN_EQUALS: + return FilterApi.ltEq(FilterApi.longColumn(columnName), + ((Number) constant).longValue()); + default: + return null; + } + } + } + + class DoubleFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { + + @Override + public FilterPredicate buildPredict(Operator op, Object constant, + String columnName) { + switch (op) { + case LESS_THAN: + return FilterApi.lt(FilterApi.doubleColumn(columnName), + ((Number) constant).doubleValue()); + case IS_NULL: + case EQUALS: + case NULL_SAFE_EQUALS: + return FilterApi.eq(FilterApi.doubleColumn(columnName), + (constant == null) ? null : ((Number) constant).doubleValue()); + case LESS_THAN_EQUALS: + return FilterApi.ltEq(FilterApi.doubleColumn(columnName), + ((Number) constant).doubleValue()); + default: + return null; + } + } + } + + class BooleanFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { + @Override + public FilterPredicate buildPredict(Operator op, Object constant, + String columnName) throws Exception{ + switch (op) { + case IS_NULL: + case EQUALS: + case NULL_SAFE_EQUALS: + return FilterApi.eq(FilterApi.booleanColumn(columnName), + (constant == null) ? null : ((Boolean)constant).booleanValue()); + default: + return null; + } + } + } + + class BinaryFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { + @Override + public FilterPredicate buildPredict(Operator op, Object constant, + String columnName) throws Exception{ + switch (op) { + case LESS_THAN: + return FilterApi.lt(FilterApi.binaryColumn(columnName), + Binary.fromString((String) constant)); + case IS_NULL: + case EQUALS: + case NULL_SAFE_EQUALS: + return FilterApi.eq(FilterApi.binaryColumn(columnName), + (constant == null) ? null : Binary.fromString((String) constant)); + case LESS_THAN_EQUALS: + return FilterApi.ltEq(FilterApi.binaryColumn(columnName), + Binary.fromString((String) constant)); + default: + // should never be executed + return null; + } + } + } + + /** + * get leaf filter builder by FilterPredicateType, currently date, decimal and timestamp is not + * supported yet. + * @param type FilterPredicateType + * @return + */ + public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type type){ + switch (type){ + case INTEGER: + return new IntFilterPredicateLeafBuilder(); + case LONG: + return new LongFilterPredicateLeafBuilder(); + case FLOAT: // float and double + return new DoubleFilterPredicateLeafBuilder(); + case STRING: // string, char, varchar + return new BinaryFilterPredicateLeafBuilder(); + case BOOLEAN: + return new BooleanFilterPredicateLeafBuilder(); + case DATE: + case DECIMAL: + case TIMESTAMP: + default: + return null; + } + } + + public enum FilterOp { + LessThan, + Equals + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java index f5da46d..4694b69 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java @@ -20,8 +20,14 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.FileSplit; @@ -32,6 +38,7 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; +import parquet.filter2.predicate.FilterPredicate; import parquet.hadoop.ParquetFileReader; import parquet.hadoop.ParquetInputFormat; import parquet.hadoop.ParquetInputSplit; @@ -83,6 +90,8 @@ public ParquetRecordReaderWrapper( taskAttemptID = new TaskAttemptID(); } + this.setFilter(oldJobConf); + // create a TaskInputOutputContext final TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(oldJobConf, taskAttemptID); @@ -110,6 +119,22 @@ public ParquetRecordReaderWrapper( } } + public void setFilter(final JobConf conf){ + FilterPredicate p = null; + String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR); + String columnNamesString = + conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR); + if ((serializedPushdown == null) && columnNamesString == null){ + return; + } + SearchArgument sarg; + if(serializedPushdown != null){ + sarg = SearchArgumentFactory.create(Utilities.deserializeExpression(serializedPushdown)); + p = sarg.toFilterPredicate(); + ParquetInputFormat.setFilterPredicate(conf, p); + } + } + @Override public void close() throws IOException { if (realReader != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index eeb9641..130adb8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -30,9 +30,13 @@ import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder; +import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -59,11 +63,14 @@ import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; +import parquet.filter2.predicate.FilterApi; +import parquet.filter2.predicate.FilterPredicate; /** * The implementation of SearchArguments. */ final class SearchArgumentImpl implements SearchArgument { + public static final Log LOG = LogFactory.getLog(SearchArgumentImpl.class); static final class PredicateLeafImpl implements PredicateLeaf { private final Operator operator; @@ -98,7 +105,14 @@ public Operator getOperator() { } @Override - public Type getType() { + public Type getParquetType() { + return type; + } + + public Type getOrcType() { + if(type == Type.LONG){ + return Type.INTEGER; + } return type; } @@ -108,7 +122,22 @@ public String getColumnName() { } @Override - public Object getLiteral() { + public Object getOrcLiteral() { + // To get around a kryo 2.22 bug while deserialize a Timestamp into Date + // (https://github.com/EsotericSoftware/kryo/issues/88) + // When we see a Date, convert back into Timestamp + if (literal instanceof java.util.Date) { + return new Timestamp(((java.util.Date)literal).getTime()); + } + // adapt base type to what orc needs + if(literal instanceof Integer){ + return Long.valueOf(literal.toString()); + } + return literal; + } + + @Override + public Object getParquetLiteral() { // To get around a kryo 2.22 bug while deserialize a Timestamp into Date // (https://github.com/EsotericSoftware/kryo/issues/88) // When we see a Date, convert back into Timestamp @@ -120,6 +149,20 @@ public Object getLiteral() { @Override public List getLiteralList() { + // no need to cast + if (literalList == null || literalList.size() == 0 || !(literalList.get(0) instanceof + Integer)) { + return literalList; + } + List result = new ArrayList(); + for (Object o : literalList) { + result.add(Long.valueOf((Integer) o)); + } + return result; + } + + @Override + public List getParquetLiteralList() { return literalList; } @@ -254,6 +297,76 @@ TruthValue evaluate(TruthValue[] leaves) { } } + FilterPredicate translate(List leafs){ + FilterPredicate p = null; + switch (operator) { + case OR: + for(ExpressionTree child: children) { + if (p == null) { + p = child.translate(leafs); + } else { + FilterPredicate right = child.translate(leafs); + // constant means no filter, ignore it when it is null + if(right != null){ + p = FilterApi.and(p, right); + } + } + } + return p; + case AND: + for(ExpressionTree child: children) { + if (p == null) { + p = child.translate(leafs); + } else { + FilterPredicate right = child.translate(leafs); + // constant means no filter, ignore it when it is null + if(right != null){ + p = FilterApi.and(p, right); + } + } + } + return p; + case NOT: + FilterPredicate op = children.get(0).translate(leafs); + if(op != null){ + return FilterApi.not(op); + }else{ + return null; + } + case LEAF: + return buildFilterPredicateFromPredicateLeaf(leafs.get(leaf)); + case CONSTANT: + return null;// no filter will be executed for constant + default: + throw new IllegalStateException("Unknown operator: " + operator); + } + } + + private FilterPredicate buildFilterPredicateFromPredicateLeaf(PredicateLeaf leaf) { + LeafFilterFactory leafFilterFactory = new LeafFilterFactory(); + FilterPredicateLeafBuilder builder; + try { + if (isMultiLiteralsOperator(leaf.getOperator())) { + builder = leafFilterFactory.getLeafFilterBuilderByType(leaf.getParquetType()); + if (builder == null) return null; + return builder.buildPredict(leaf.getOperator(), leaf.getParquetLiteralList(), + leaf.getColumnName()); + } else { + builder = leafFilterFactory.getLeafFilterBuilderByType(leaf.getParquetType()); + if (builder == null) return null; + return builder.buildPredict(leaf.getOperator(), leaf.getParquetLiteral(), + leaf.getColumnName()); + } + } catch (Exception e) { + LOG.error("fail to build predicate filter leaf with errors", e); + return null; + } + } + + private boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) { + return (op == PredicateLeaf.Operator.IN) || (op == PredicateLeaf.Operator.BETWEEN); + } + @Override public String toString() { StringBuilder buffer = new StringBuilder(); @@ -314,8 +427,9 @@ Operator getOperator() { case BYTE: case SHORT: case INT: - case LONG: return PredicateLeaf.Type.INTEGER; + case LONG: + return PredicateLeaf.Type.LONG; case CHAR: case VARCHAR: case STRING: @@ -360,6 +474,8 @@ private static String getColumnName(ExprNodeGenericFuncDesc expr, private static Object boxLiteral(ExprNodeConstantDesc lit) { switch (getType(lit)) { case INTEGER: + return ((Number) lit.getValue()).intValue(); + case LONG: return ((Number) lit.getValue()).longValue(); case STRING: return StringUtils.stripEnd(lit.getValue().toString(), null); @@ -420,6 +536,7 @@ private ExpressionTree createLeaf(PredicateLeaf.Operator operator, if (type == null) { return new ExpressionTree(TruthValue.YES_NO_NULL); } + Object literal = null; List literalList = null; switch (operator) { @@ -903,6 +1020,11 @@ static SearchArgument fromKryo(String value) { return new Kryo().readObject(input, SearchArgumentImpl.class); } + @Override + public FilterPredicate toFilterPredicate() { + return expression.translate(leaves); + } + private static class BuilderImpl implements Builder { private final Deque currentTree = new ArrayDeque(); @@ -973,7 +1095,7 @@ private static Object boxLiteral(Object literal) { } else if (literal instanceof Byte || literal instanceof Short || literal instanceof Integer) { - return Long.valueOf(literal.toString()); + return Integer.valueOf(literal.toString()); } else if (literal instanceof Float) { // to avoid change in precision when upcasting float to double // we convert the literal to string and parse it as double. (HIVE-8460) @@ -987,10 +1109,11 @@ private static Object boxLiteral(Object literal) { private static PredicateLeaf.Type getType(Object literal) { if (literal instanceof Byte || literal instanceof Short || - literal instanceof Integer || - literal instanceof Long) { + literal instanceof Integer) { return PredicateLeaf.Type.INTEGER; - } else if (literal instanceof HiveChar || + } else if(literal instanceof Long){ + return PredicateLeaf.Type.LONG; + }else if (literal instanceof HiveChar || literal instanceof HiveVarchar || literal instanceof String) { return PredicateLeaf.Type.STRING; @@ -1005,7 +1128,7 @@ private static Object boxLiteral(Object literal) { literal instanceof BigDecimal) { return PredicateLeaf.Type.DECIMAL; } else if (literal instanceof Boolean) { - return PredicateLeaf.Type.BOOLEAN; + return PredicateLeaf.Type.BOOLEAN; } throw new IllegalArgumentException("Unknown type for literal " + literal); } @@ -1069,6 +1192,7 @@ public Builder in(String column, Object... literal) { for(Object lit: literal){ argList.add(boxLiteral(lit)); } + PredicateLeaf leaf = new PredicateLeafImpl(PredicateLeaf.Operator.IN, getType(argList.get(0)), column, null, argList); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java index 831ef8c..f6e9011 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java @@ -22,14 +22,13 @@ import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionBuilder; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionTree; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.junit.Test; +import parquet.filter2.predicate.FilterPredicate; import java.beans.XMLDecoder; import java.io.ByteArrayInputStream; @@ -39,6 +38,7 @@ import java.util.Set; import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertNull; import static junit.framework.Assert.assertTrue; /** @@ -47,7 +47,7 @@ * to true and using a custom record reader that prints out the value of * hive.io.filter.expr.serialized in createRecordReader. This should be * replaced by generating the AST using the API and passing that in. - * + *

* In each case, the corresponding part of the where clause is in the * comment above the blob. */ @@ -76,12 +76,11 @@ private ExpressionTree constant(TruthValue val) { /** * Create a predicate leaf. This is used by another test. */ - public static - PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator, - PredicateLeaf.Type type, - String columnName, - Object literal, - List literalList) { + public static PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator, + PredicateLeaf.Type type, + String columnName, + Object literal, + List literalList) { return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName, literal, literalList); } @@ -134,7 +133,7 @@ public void testFlatten() throws Exception { ).toString()); assertEquals("(and leaf-1 leaf-2 leaf-3 leaf-4)", ExpressionBuilder.flatten(and(and(leaf(1), leaf(2)), - and(leaf(3),leaf(4)))).toString()); + and(leaf(3), leaf(4)))).toString()); assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4)", ExpressionBuilder.flatten(or(leaf(1), or(leaf(2), or(leaf(3), leaf(4))))).toString()); @@ -143,11 +142,11 @@ public void testFlatten() throws Exception { leaf(4))).toString()); assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4 leaf-5 leaf-6)", ExpressionBuilder.flatten(or(or(leaf(1), or(leaf(2), leaf(3))), - or(or(leaf(4),leaf(5)), leaf(6)))).toString()); + or(or(leaf(4), leaf(5)), leaf(6)))).toString()); assertEquals("(and (not leaf-1) leaf-2 (not leaf-3) leaf-4 (not leaf-5) leaf-6)", ExpressionBuilder.flatten(and(and(not(leaf(1)), and(leaf(2), not(leaf(3)))), and(and(leaf(4), not(leaf(5))), leaf(6))) - ).toString()); + ).toString()); assertEquals("(not (and leaf-1 leaf-2 leaf-3))", ExpressionBuilder.flatten(not(and(leaf(1), and(leaf(2), leaf(3)))) ).toString()); @@ -245,20 +244,20 @@ public void testCNF() throws Exception { private static void assertNoSharedNodes(ExpressionTree tree, Set seen - ) throws Exception { + ) throws Exception { if (seen.contains(tree) && tree.getOperator() != ExpressionTree.Operator.LEAF) { assertTrue("repeated node in expression " + tree, false); } seen.add(tree); if (tree.getChildren() != null) { - for(ExpressionTree child: tree.getChildren()) { + for (ExpressionTree child : tree.getChildren()) { assertNoSharedNodes(child, seen); } } } - private ExprNodeGenericFuncDesc getFuncDesc (String xmlSerialized) { + private ExprNodeGenericFuncDesc getFuncDesc(String xmlSerialized) { byte[] bytes; try { bytes = xmlSerialized.getBytes("UTF-8"); @@ -275,6 +274,7 @@ private ExprNodeGenericFuncDesc getFuncDesc (String xmlSerialized) { decoder.close(); } } + @Test public void testExpression1() throws Exception { // first_name = 'john' or @@ -749,59 +749,68 @@ public void testExpression1() throws Exception { List leaves = sarg.getLeaves(); assertEquals(9, leaves.size()); + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(and(and(and(and(and(and(eq(first_name, Binary{\"john\"}), " + + "not(lteq(first_name, Binary{\"greg\"}))), lt(first_name, Binary{\"alan\"})), " + + "not(lteq(id, 12))), not(lteq(id, 13))), lt(id, 15)), lt(id, 16)), eq(id, 30)), " + + "and(and(and(and(and(and(and(eq(first_name, Binary{\"john\"}), not(lteq(first_name, " + + "Binary{\"greg\"}))), lt(first_name, Binary{\"alan\"})), not(lteq(id, 12))), " + + "not(lteq(id, 13))), lt(id, 15)), lt(id, 16)), eq(first_name, Binary{\"owen\"})))"; + assertEquals(expected, p.toString()); + PredicateLeaf leaf = leaves.get(0); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("john", leaf.getLiteral()); + assertEquals("john", leaf.getOrcLiteral()); leaf = leaves.get(1); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("greg", leaf.getLiteral()); + assertEquals("greg", leaf.getOrcLiteral()); leaf = leaves.get(2); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("alan", leaf.getLiteral()); + assertEquals("alan", leaf.getOrcLiteral()); leaf = leaves.get(3); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral()); + assertEquals(12L, leaf.getOrcLiteral()); leaf = leaves.get(4); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(13L, leaf.getLiteral()); + assertEquals(13L, leaf.getOrcLiteral()); leaf = leaves.get(5); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(15L, leaf.getLiteral()); + assertEquals(15L, leaf.getOrcLiteral()); leaf = leaves.get(6); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(16L, leaf.getLiteral()); + assertEquals(16L, leaf.getOrcLiteral()); leaf = leaves.get(7); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(30L, leaf.getLiteral()); + assertEquals(30L, leaf.getOrcLiteral()); leaf = leaves.get(8); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("owen", leaf.getLiteral()); + assertEquals("owen", leaf.getOrcLiteral()); assertEquals("(and (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" + " (not leaf-4) leaf-5 leaf-6 leaf-7)" + @@ -1017,30 +1026,35 @@ public void testExpression2() throws Exception { List leaves = sarg.getLeaves(); assertEquals(4, leaves.size()); + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(and(eq(first_name, null), not(eq(first_name, Binary{\"sue\"}))), " + + "not(lt(id, 12))), lteq(id, 4))"; + assertEquals(p.toString(), expected); + PredicateLeaf leaf = leaves.get(0); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.IS_NULL, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals(null, leaf.getLiteral()); + assertEquals(null, leaf.getOrcLiteral()); assertEquals(null, leaf.getLiteralList()); leaf = leaves.get(1); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("sue", leaf.getLiteral()); + assertEquals("sue", leaf.getOrcLiteral()); leaf = leaves.get(2); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral()); + assertEquals(12L, leaf.getOrcLiteral()); leaf = leaves.get(3); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(4L, leaf.getLiteral()); + assertEquals(4L, leaf.getOrcLiteral()); assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)", sarg.getExpression().toString()); @@ -1436,25 +1450,30 @@ public void testExpression3() throws Exception { List leaves = sarg.getLeaves(); assertEquals(3, leaves.size()); + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(and(lt(id, 45), not(lteq(id, 23))), eq(first_name, " + + "Binary{\"alan\"})), eq(last_name, Binary{\"smith\"}))"; + assertEquals(p.toString(), expected); + PredicateLeaf leaf = leaves.get(0); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(null, leaf.getLiteral()); + assertEquals(null, leaf.getOrcLiteral()); assertEquals(23L, leaf.getLiteralList().get(0)); assertEquals(45L, leaf.getLiteralList().get(1)); leaf = leaves.get(1); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("alan", leaf.getLiteral()); + assertEquals("alan", leaf.getOrcLiteral()); leaf = leaves.get(2); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("last_name", leaf.getColumnName()); - assertEquals("smith", leaf.getLiteral()); + assertEquals("smith", leaf.getOrcLiteral()); assertEquals("(and leaf-0 leaf-1 leaf-2)", sarg.getExpression().toString()); @@ -1646,21 +1665,26 @@ id in (34,50) */ List leaves = sarg.getLeaves(); assertEquals(3, leaves.size()); + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(not(eq(id, 12)), or(eq(first_name, Binary{\"john\"}), " + + "eq(first_name, Binary{\"sue\"}))), or(eq(id, 34), eq(id, 50)))"; + assertEquals(p.toString(), expected); + PredicateLeaf leaf = leaves.get(0); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral()); + assertEquals(12L, leaf.getOrcLiteral()); leaf = leaves.get(1); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); assertEquals("john", leaf.getLiteralList().get(0)); assertEquals("sue", leaf.getLiteralList().get(1)); leaf = leaves.get(2); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); assertEquals(34L, leaf.getLiteralList().get(0)); @@ -1901,7 +1925,12 @@ public void testExpression5() throws Exception { List leaves = sarg.getLeaves(); assertEquals(1, leaves.size()); - assertEquals(PredicateLeaf.Type.STRING, leaves.get(0).getType()); + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, " + + "Binary{\"david\"})))"; + assertEquals(p.toString(), expected); + + assertEquals(PredicateLeaf.Type.STRING, leaves.get(0).getOrcType()); assertEquals(PredicateLeaf.Operator.BETWEEN, leaves.get(0).getOperator()); assertEquals("first_name", leaves.get(0).getColumnName()); @@ -2378,59 +2407,81 @@ public void testExpression7() throws Exception { List leaves = sarg.getLeaves(); assertEquals(9, leaves.size()); + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and" + + "(and(and(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), and(and(and(lt(id, 18), " + + "lt(id, 11)), lt(id, 13)), lt(id, 16))), " + + "and(and(and(lt(id, 18), lt(id, 12)), lt(id, 13)), lt(id, 16))), and(and(and(lt(id, 18), " + + "lt(id, 10)), lt(id, 14)), lt(id, 16))), and(and(and(lt(id, 18), " + + "lt(id, 11)), lt(id, 14)), lt(id, 16))), and(and(and(lt(id, 18), " + + "lt(id, 12)), lt(id, 14)), lt(id, 16))), and(and(and(lt(id, 18), " + + "lt(id, 10)), lt(id, 15)), lt(id, 16))), and(and(and(lt(id, 18), " + + "lt(id, 11)), lt(id, 15)), lt(id, 16))), and(and(and(lt(id, 18), " + + "lt(id, 12)), lt(id, 15)), lt(id, 16))), and(and(and(lt(id, 18), " + + "lt(id, 10)), lt(id, 13)), lt(id, 17))), and(and(and(lt(id, 18), " + + "lt(id, 11)), lt(id, 13)), lt(id, 17))), and(and(and(lt(id, 18), " + + "lt(id, 12)), lt(id, 13)), lt(id, 17))), and(and(and(lt(id, 18), " + + "lt(id, 10)), lt(id, 14)), lt(id, 17))), and(and(and(lt(id, 18), " + + "lt(id, 11)), lt(id, 14)), lt(id, 17))), and(and(and(lt(id, 18), " + + "lt(id, 12)), lt(id, 14)), lt(id, 17))), and(and(and(lt(id, 18), " + + "lt(id, 10)), lt(id, 15)), lt(id, 17))), and(and(and(lt(id, 18), " + + "lt(id, 11)), lt(id, 15)), lt(id, 17))), and(and(and(lt(id, 18), " + + "lt(id, 12)), lt(id, 15)), lt(id, 17)))"; + assertEquals(p.toString(), expected); + PredicateLeaf leaf = leaves.get(0); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(18L, leaf.getLiteral()); + assertEquals(18L, leaf.getOrcLiteral()); leaf = leaves.get(1); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(10L, leaf.getLiteral()); + assertEquals(10L, leaf.getOrcLiteral()); leaf = leaves.get(2); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(13L, leaf.getLiteral()); + assertEquals(13L, leaf.getOrcLiteral()); leaf = leaves.get(3); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(16L, leaf.getLiteral()); + assertEquals(16L, leaf.getOrcLiteral()); leaf = leaves.get(4); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(11L, leaf.getLiteral()); + assertEquals(11L, leaf.getOrcLiteral()); leaf = leaves.get(5); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral()); + assertEquals(12L, leaf.getOrcLiteral()); leaf = leaves.get(6); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(14L, leaf.getLiteral()); + assertEquals(14L, leaf.getOrcLiteral()); leaf = leaves.get(7); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(15L, leaf.getLiteral()); + assertEquals(15L, leaf.getOrcLiteral()); leaf = leaves.get(8); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(17L, leaf.getLiteral()); + assertEquals(17L, leaf.getOrcLiteral()); assertEquals("(and" + " (or leaf-0 leaf-1 leaf-2 leaf-3)" + @@ -2512,6 +2563,9 @@ public void testExpression8() throws Exception { List leaves = sarg.getLeaves(); assertEquals(0, leaves.size()); + FilterPredicate p = sarg.toFilterPredicate(); + assertNull(p); + assertEquals("YES_NO_NULL", sarg.getExpression().toString()); } @@ -2648,115 +2702,115 @@ public void testExpression9() throws Exception { public void testExpression10() throws Exception { /* id >= 10 and not (10 > id) */ String exprStr = " \n" + - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " id \n"+ - " \n"+ - " \n"+ - " orc_people \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " int \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " 10 \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " boolean \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " id \n"+ - " \n"+ - " \n"+ - " orc_people \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " 10 \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " int \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 10 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " boolean \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 10 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + ""; SearchArgumentImpl sarg = @@ -2764,11 +2818,15 @@ public void testExpression10() throws Exception { List leaves = sarg.getLeaves(); assertEquals(1, leaves.size()); - assertEquals(PredicateLeaf.Type.INTEGER, leaves.get(0).getType()); + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(not(lt(id, 10)), not(lt(id, 10)))"; + assertEquals(expected, p.toString()); + + assertEquals(PredicateLeaf.Type.INTEGER, leaves.get(0).getOrcType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaves.get(0).getOperator()); assertEquals("id", leaves.get(0).getColumnName()); - assertEquals(10L, leaves.get(0).getLiteral()); + assertEquals(10L, leaves.get(0).getOrcLiteral()); assertEquals("(and (not leaf-0) (not leaf-0))", sarg.getExpression().toString()); @@ -2792,9 +2850,9 @@ public void testBuilder() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("x", 10) - .lessThanEquals("y", "hi") - .equals("z", 1.0) + .lessThan("x", 10) + .lessThanEquals("y", "hi") + .equals("z", 1.0) .end() .build(); assertEquals("leaf-0 = (LESS_THAN x 10)\n" + @@ -2803,12 +2861,12 @@ public void testBuilder() throws Exception { "expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString()); sarg = SearchArgumentFactory.newBuilder() .startNot() - .startOr() - .isNull("x") - .between("y", 10, 20) - .in("z", 1, 2, 3) - .nullSafeEquals("a", "stinger") - .end() + .startOr() + .isNull("x") + .between("y", 10, 20) + .in("z", 1, 2, 3) + .nullSafeEquals("a", "stinger") + .end() .end() .build(); assertEquals("leaf-0 = (IS_NULL x)\n" + @@ -2816,6 +2874,11 @@ public void testBuilder() throws Exception { "leaf-2 = (IN z 1 2 3)\n" + "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString()); + + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), " + + "not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))"; + assertEquals(expected, p.toString()); } @Test @@ -2823,9 +2886,9 @@ public void testBuilderComplexTypes() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("x", new DateWritable(10)) - .lessThanEquals("y", new HiveChar("hi", 10)) - .equals("z", HiveDecimal.create("1.0")) + .lessThan("x", new DateWritable(10)) + .lessThanEquals("y", new HiveChar("hi", 10)) + .equals("z", HiveDecimal.create("1.0")) .end() .build(); assertEquals("leaf-0 = (LESS_THAN x 1970-01-11)\n" + @@ -2835,12 +2898,12 @@ public void testBuilderComplexTypes() throws Exception { sarg = SearchArgumentFactory.newBuilder() .startNot() - .startOr() - .isNull("x") - .between("y", HiveDecimal.create(10), 20.0) - .in("z", (byte)1, (short)2, (int)3) - .nullSafeEquals("a", new HiveVarchar("stinger", 100)) - .end() + .startOr() + .isNull("x") + .between("y", HiveDecimal.create(10), 20.0) + .in("z", (byte) 1, (short) 2, (int) 3) + .nullSafeEquals("a", new HiveVarchar("stinger", 100)) + .end() .end() .build(); assertEquals("leaf-0 = (IS_NULL x)\n" + @@ -2848,6 +2911,11 @@ public void testBuilderComplexTypes() throws Exception { "leaf-2 = (IN z 1 2 3)\n" + "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString()); + + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " + + "not(eq(a, Binary{\"stinger\"})))"; + assertEquals(expected, p.toString()); } @Test @@ -2870,7 +2938,7 @@ public void testBuilderComplexTypes2() throws Exception { .startOr() .isNull("x") .between("y", new BigDecimal(10), 20.0) - .in("z", (byte)1, (short)2, (int)3) + .in("z", (byte) 1, (short) 2, (int) 3) .nullSafeEquals("a", new HiveVarchar("stinger", 100)) .end() .end() @@ -2880,6 +2948,11 @@ public void testBuilderComplexTypes2() throws Exception { "leaf-2 = (IN z 1 2 3)\n" + "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString()); + + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " + + "not(eq(a, Binary{\"stinger\"})))"; + assertEquals(expected, p.toString()); } @Test @@ -2900,5 +2973,10 @@ public void testBuilderFloat() throws Exception { "leaf-3 = (EQUALS z 0.22)\n" + "leaf-4 = (EQUALS z1 0.22)\n" + "expr = (and leaf-0 leaf-1 leaf-2 leaf-3 leaf-4)", sarg.toString()); + + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(and(and(lt(x, 22), lt(x1, 22)), lteq(y, Binary{\"hi\"})), eq(z, " + + "0.22)), eq(z1, 0.22))"; + assertEquals(expected, p.toString()); } } diff --git serde/pom.xml serde/pom.xml index 98e5506..8c60b30 100644 --- serde/pom.xml +++ serde/pom.xml @@ -75,6 +75,11 @@ opencsv ${opencsv.version} + + com.twitter + parquet-hadoop-bundle + ${parquet.version} + diff --git serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java index 616c6db..0076220 100644 --- serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java +++ serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java @@ -43,7 +43,8 @@ * The possible types for sargs. */ public static enum Type { - INTEGER, // all of the integer types + INTEGER, // all of the integer types except long + LONG, FLOAT, // float and double STRING, // string, char, varchar DATE, @@ -58,9 +59,14 @@ public Operator getOperator(); /** - * Get the type of the column and literal. + * Get the type of the column and literal for parquet. */ - public Type getType(); + public Type getOrcType(); + + /** + * Get the type of the column and literal for ORC. + */ + public Type getParquetType(); /** * Get the simple column name. @@ -69,14 +75,26 @@ public String getColumnName(); /** - * Get the literal half of the predicate leaf. + * Get the literal half of the predicate leaf. Adapt the original type for what orc needs * @return a Long, Double, or String */ - public Object getLiteral(); + public Object getOrcLiteral(); + + /** + * Get the literal half of the predicate leaf. Adapt the original type for what parquet needs + * @return a Int, Long, Double, or String + */ + public Object getParquetLiteral(); /** * For operators with multiple literals (IN and BETWEEN), get the literals. * @return the list of literals (Longs, Doubles, or Strings) */ public List getLiteralList(); + + /** + * For operators with multiple literals (IN and BETWEEN), get the literals. + * @return the list of literals (Ints, Longs, Doubles, or Strings) + */ + public List getParquetLiteralList(); } diff --git serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java index db0f014..3642fdb 100644 --- serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java +++ serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.io.sarg; +import parquet.filter2.predicate.FilterPredicate; + import java.util.List; /** @@ -175,6 +177,12 @@ public boolean isNeeded() { public String toKryo(); /** + * translate the search argument to the filter predicate parquet used + * @return + */ + public FilterPredicate toFilterPredicate(); + + /** * A builder object for contexts outside of Hive where it isn't easy to * get a ExprNodeDesc. The user must call startOr, startAnd, or startNot * before adding any leaves.