commit 1675eedaefaf9062aaa0ddc7d369141d8ffc5683 Author: Owen O'Malley Date: Wed Jul 1 15:59:31 2015 -0700 HIVE-10799. Refactor SearchArgumentFactory to remove the dependence on ExprNodeGenericFuncDesc. diff --git common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java index a8215f2..f14fc2d 100644 --- common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java +++ common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java @@ -75,7 +75,7 @@ public static HiveDecimal create(BigInteger unscaled, int scale) { public static HiveDecimal create(String dec) { BigDecimal bd; try { - bd = new BigDecimal(dec); + bd = new BigDecimal(dec.trim()); } catch (NumberFormatException ex) { return null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 3a9e64e..4e6dd7a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -54,10 +54,10 @@ import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader; +import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -305,7 +305,7 @@ static void setSearchArgument(Reader.Options options, options.searchArgument(null, null); return; } - SearchArgument sarg = SearchArgumentFactory.createFromConf(conf); + SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf); if (sarg == null) { LOG.debug("No ORC pushdown predicate"); options.searchArgument(null, null); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 4f79e37..f85420d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -47,8 +47,8 @@ import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim; import org.apache.hadoop.io.Text; @@ -523,7 +523,8 @@ private static TruthValue checkInBloomFilter(BloomFilterIO bf, Object predObj, b result = TruthValue.YES_NO_NULL; } } else if (predObj instanceof String || predObj instanceof Text || - predObj instanceof HiveDecimal || predObj instanceof BigDecimal) { + predObj instanceof HiveDecimalWritable || + predObj instanceof BigDecimal) { if (bf.testString(predObj.toString())) { result = TruthValue.YES_NO_NULL; } @@ -560,11 +561,7 @@ private static TruthValue checkInBloomFilter(BloomFilterIO bf, Object predObj, b } private static Object getBaseObjectForComparison(PredicateLeaf.Type type, Object obj) { - if (obj != null) { - if (obj instanceof ExprNodeConstantDesc) { - obj = ((ExprNodeConstantDesc) obj).getValue(); - } - } else { + if (obj == null) { return null; } switch (type) { @@ -588,20 +585,23 @@ private static Object getBaseObjectForComparison(PredicateLeaf.Type type, Object break; case DECIMAL: if (obj instanceof Boolean) { - return ((Boolean) obj).booleanValue() ? HiveDecimal.ONE : HiveDecimal.ZERO; + return new HiveDecimalWritable(((Boolean) obj).booleanValue() ? + HiveDecimal.ONE : HiveDecimal.ZERO); } else if (obj instanceof Integer) { - return HiveDecimal.create(((Integer) obj).intValue()); + return new HiveDecimalWritable(((Integer) obj).intValue()); } else if (obj instanceof Long) { - return HiveDecimal.create(((Long) obj)); + return new HiveDecimalWritable(((Long) obj)); } else if (obj instanceof Float || obj instanceof Double || obj instanceof String) { - return HiveDecimal.create(obj.toString()); + return new HiveDecimalWritable(obj.toString()); } else if (obj instanceof BigDecimal) { - return HiveDecimal.create((BigDecimal) obj); + return new HiveDecimalWritable(HiveDecimal.create((BigDecimal) obj)); } else if (obj instanceof HiveDecimal) { + return new HiveDecimalWritable((HiveDecimal) obj); + } else if (obj instanceof HiveDecimalWritable) { return obj; } else if (obj instanceof Timestamp) { - return HiveDecimal.create( + return new HiveDecimalWritable( new Double(new TimestampWritable((Timestamp) obj).getDouble()).toString()); } break; @@ -641,12 +641,16 @@ private static Object getBaseObjectForComparison(PredicateLeaf.Type type, Object case TIMESTAMP: if (obj instanceof Timestamp) { return obj; + } else if (obj instanceof Integer) { + return TimestampWritable.longToTimestamp(((Number) obj).longValue(), false); } else if (obj instanceof Float) { return TimestampWritable.doubleToTimestamp(((Float) obj).doubleValue()); } else if (obj instanceof Double) { return TimestampWritable.doubleToTimestamp(((Double) obj).doubleValue()); } else if (obj instanceof HiveDecimal) { return TimestampWritable.decimalToTimestamp((HiveDecimal) obj); + } else if (obj instanceof HiveDecimalWritable) { + return TimestampWritable.decimalToTimestamp(((HiveDecimalWritable) obj).getHiveDecimal()); } else if (obj instanceof Date) { return new Timestamp(((Date) obj).getTime()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java index a64ec06..49e52da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java @@ -27,10 +27,10 @@ import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder; import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory; import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher; +import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.io.ArrayWritable; @@ -149,7 +149,7 @@ public ParquetRecordReaderWrapper( } SearchArgument sarg = - SearchArgumentFactory.create(Utilities.deserializeExpression + ConvertAstToSearchArg.create(Utilities.deserializeExpression (serializedPushdown)); FilterPredicate p = toFilterPredicate(sarg); if (p != null) { @@ -316,8 +316,7 @@ protected ParquetInputSplit getSplit( * @return translate the sarg into a filter predicate */ public static FilterPredicate toFilterPredicate(SearchArgument sarg) { - return translate(sarg.getExpression(), - sarg.getLeaves()); + return translate(sarg.getExpression(), sarg.getLeaves()); } private static boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java new file mode 100644 index 0000000..2e19713 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -0,0 +1,438 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.sarg; + +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import java.sql.Date; +import java.sql.Timestamp; +import java.util.List; + +public class ConvertAstToSearchArg { + private static final Log LOG = LogFactory.getLog(ConvertAstToSearchArg.class); + private final SearchArgument.Builder builder = + SearchArgumentFactory.newBuilder(); + + /** + * Builds the expression and leaf list from the original predicate. + * @param expression the expression to translate. + */ + ConvertAstToSearchArg(ExprNodeGenericFuncDesc expression) { + parse(expression); + } + + /** + * Build the search argument from the expression. + * @return the search argument + */ + public SearchArgument buildSearchArgument() { + return builder.build(); + } + + /** + * Get the type of the given expression node. + * @param expr the expression to get the type of + * @return int, string, or float or null if we don't know the type + */ + private static PredicateLeaf.Type getType(ExprNodeDesc expr) { + TypeInfo type = expr.getTypeInfo(); + if (type.getCategory() == ObjectInspector.Category.PRIMITIVE) { + switch (((PrimitiveTypeInfo) type).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: + return PredicateLeaf.Type.INTEGER; + case LONG: + return PredicateLeaf.Type.LONG; + case CHAR: + case VARCHAR: + case STRING: + return PredicateLeaf.Type.STRING; + case FLOAT: + case DOUBLE: + return PredicateLeaf.Type.FLOAT; + case DATE: + return PredicateLeaf.Type.DATE; + case TIMESTAMP: + return PredicateLeaf.Type.TIMESTAMP; + case DECIMAL: + return PredicateLeaf.Type.DECIMAL; + case BOOLEAN: + return PredicateLeaf.Type.BOOLEAN; + default: + } + } + return null; + } + + /** + * Get the column name referenced in the expression. It must be at the top + * level of this expression and there must be exactly one column. + * @param expr the expression to look in + * @param variable the slot the variable is expected in + * @return the column name or null if there isn't exactly one column + */ + private static String getColumnName(ExprNodeGenericFuncDesc expr, + int variable) { + List children = expr.getChildren(); + if (variable < 0 || variable >= children.size()) { + return null; + } + ExprNodeDesc child = children.get(variable); + if (child instanceof ExprNodeColumnDesc) { + return ((ExprNodeColumnDesc) child).getColumn(); + } + return null; + } + + private static Object boxLiteral(ExprNodeConstantDesc constantDesc, + PredicateLeaf.Type type) { + Object lit = constantDesc.getValue(); + if (lit == null) { + return null; + } + switch (type) { + case INTEGER: + return ((Number) lit).intValue(); + case LONG: + return ((Number) lit).longValue(); + case STRING: + if (lit instanceof HiveChar) { + lit = ((HiveChar) lit).getPaddedValue(); + } else if (lit instanceof String) { + return lit; + } else { + return lit.toString(); + } + case FLOAT: + if (lit instanceof Float) { + // converting a float directly to a double causes annoying conversion + // problems + return Double.parseDouble(lit.toString()); + } else { + return ((Number) lit).doubleValue(); + } + case TIMESTAMP: + return Timestamp.valueOf(lit.toString()); + case DATE: + return Date.valueOf(lit.toString()); + case DECIMAL: + return new HiveDecimalWritable(lit.toString()); + case BOOLEAN: + return lit; + default: + throw new IllegalArgumentException("Unknown literal " + type); + } + } + + /** + * Find the child that is the literal. + * @param expr the parent node to check + * @param type the type of the expression + * @return the literal boxed if found or null + */ + private static Object findLiteral(ExprNodeGenericFuncDesc expr, + PredicateLeaf.Type type) { + List children = expr.getChildren(); + if (children.size() != 2) { + return null; + } + Object result = null; + for(ExprNodeDesc child: children) { + if (child instanceof ExprNodeConstantDesc) { + if (result != null) { + return null; + } + result = boxLiteral((ExprNodeConstantDesc) child, type); + } + } + return result; + } + + /** + * Return the boxed literal at the given position + * @param expr the parent node + * @param type the type of the expression + * @param position the child position to check + * @return the boxed literal if found otherwise null + */ + private static Object getLiteral(ExprNodeGenericFuncDesc expr, + PredicateLeaf.Type type, + int position) { + List children = expr.getChildren(); + Object child = children.get(position); + if (child instanceof ExprNodeConstantDesc) { + return boxLiteral((ExprNodeConstantDesc) child, type); + } + return null; + } + + private static Object[] getLiteralList(ExprNodeGenericFuncDesc expr, + PredicateLeaf.Type type, + int start) { + List children = expr.getChildren(); + Object[] result = new Object[children.size() - start]; + + // ignore the first child, since it is the variable + int posn = 0; + for(ExprNodeDesc child: children.subList(start, children.size())) { + if (child instanceof ExprNodeConstantDesc) { + result[posn++] = boxLiteral((ExprNodeConstantDesc) child, type); + } else { + // if we get some non-literals, we need to punt + return null; + } + } + return result; + } + + private void createLeaf(PredicateLeaf.Operator operator, + ExprNodeGenericFuncDesc expression, + int variable) { + String columnName = getColumnName(expression, variable); + if (columnName == null) { + builder.literal(SearchArgument.TruthValue.YES_NO_NULL); + return; + } + PredicateLeaf.Type type = getType(expression.getChildren().get(variable)); + if (type == null) { + builder.literal(SearchArgument.TruthValue.YES_NO_NULL); + return; + } + + // if the variable was on the right, we need to swap things around + boolean needSwap = false; + if (variable != 0) { + if (operator == PredicateLeaf.Operator.LESS_THAN) { + needSwap = true; + operator = PredicateLeaf.Operator.LESS_THAN_EQUALS; + } else if (operator == PredicateLeaf.Operator.LESS_THAN_EQUALS) { + needSwap = true; + operator = PredicateLeaf.Operator.LESS_THAN; + } + } + if (needSwap) { + builder.startNot(); + } + + switch (operator) { + case IS_NULL: + builder.isNull(columnName, type); + break; + case EQUALS: + builder.equals(columnName, type, findLiteral(expression, type)); + break; + case NULL_SAFE_EQUALS: + builder.nullSafeEquals(columnName, type, findLiteral(expression, type)); + break; + case LESS_THAN: + builder.lessThan(columnName, type, findLiteral(expression, type)); + break; + case LESS_THAN_EQUALS: + builder.lessThanEquals(columnName, type, findLiteral(expression, type)); + break; + case IN: + builder.in(columnName, type, + getLiteralList(expression, type, variable + 1)); + break; + case BETWEEN: + builder.between(columnName, type, + getLiteral(expression, type, variable + 1), + getLiteral(expression, type, variable + 2)); + break; + } + + if (needSwap) { + builder.end(); + } + } + + /** + * Find the variable in the expression. + * @param expr the expression to look in + * @return the index of the variable or -1 if there is not exactly one + * variable. + */ + private int findVariable(ExprNodeDesc expr) { + int result = -1; + List children = expr.getChildren(); + for(int i = 0; i < children.size(); ++i) { + ExprNodeDesc child = children.get(i); + if (child instanceof ExprNodeColumnDesc) { + // if we already found a variable, this isn't a sarg + if (result != -1) { + return -1; + } else { + result = i; + } + } + } + return result; + } + + /** + * Create a leaf expression when we aren't sure where the variable is + * located. + * @param operator the operator type that was found + * @param expression the expression to check + */ + private void createLeaf(PredicateLeaf.Operator operator, + ExprNodeGenericFuncDesc expression) { + createLeaf(operator, expression, findVariable(expression)); + } + + private void addChildren(ExprNodeGenericFuncDesc node) { + for(ExprNodeDesc child: node.getChildren()) { + parse(child); + } + } + + /** + * Do the recursive parse of the Hive ExprNodeDesc into our ExpressionTree. + * @param expression the Hive ExprNodeDesc + */ + private void parse(ExprNodeDesc expression) { + // Most of the stuff we can handle are generic function descriptions, so + // handle the special cases. + if (expression.getClass() != ExprNodeGenericFuncDesc.class) { + + // if it is a reference to a boolean column, covert it to a truth test. + if (expression instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) expression; + if (columnDesc.getTypeString().equals("boolean")) { + builder.equals(columnDesc.getColumn(), PredicateLeaf.Type.BOOLEAN, + true); + return; + } + } + + // otherwise, we don't know what to do so make it a maybe + builder.literal(SearchArgument.TruthValue.YES_NO_NULL); + return; + } + + // get the kind of expression + ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) expression; + Class op = expr.getGenericUDF().getClass(); + + // handle the logical operators + if (op == GenericUDFOPOr.class) { + builder.startOr(); + addChildren(expr); + builder.end(); + } else if (op == GenericUDFOPAnd.class) { + builder.startAnd(); + addChildren(expr); + builder.end(); + } else if (op == GenericUDFOPNot.class) { + builder.startNot(); + addChildren(expr); + builder.end(); + } else if (op == GenericUDFOPEqual.class) { + createLeaf(PredicateLeaf.Operator.EQUALS, expr); + } else if (op == GenericUDFOPNotEqual.class) { + builder.startNot(); + createLeaf(PredicateLeaf.Operator.EQUALS, expr); + builder.end(); + } else if (op == GenericUDFOPEqualNS.class) { + createLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, expr); + } else if (op == GenericUDFOPGreaterThan.class) { + builder.startNot(); + createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr); + builder.end(); + } else if (op == GenericUDFOPEqualOrGreaterThan.class) { + builder.startNot(); + createLeaf(PredicateLeaf.Operator.LESS_THAN, expr); + builder.end(); + } else if (op == GenericUDFOPLessThan.class) { + createLeaf(PredicateLeaf.Operator.LESS_THAN, expr); + } else if (op == GenericUDFOPEqualOrLessThan.class) { + createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr); + } else if (op == GenericUDFIn.class) { + createLeaf(PredicateLeaf.Operator.IN, expr, 0); + } else if (op == GenericUDFBetween.class) { + createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1); + } else if (op == GenericUDFOPNull.class) { + createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0); + } else if (op == GenericUDFOPNotNull.class) { + builder.startNot(); + createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0); + builder.end(); + + // otherwise, we didn't understand it, so mark it maybe + } else { + builder.literal(SearchArgument.TruthValue.YES_NO_NULL); + } + } + + + public static final String SARG_PUSHDOWN = "sarg.pushdown"; + + public static SearchArgument create(ExprNodeGenericFuncDesc expression) { + return new ConvertAstToSearchArg(expression).buildSearchArgument(); + } + + + public static SearchArgument create(String kryo) { + Input input = new Input(Base64.decodeBase64(kryo)); + return new Kryo().readObject(input, SearchArgumentImpl.class); + } + + public static SearchArgument createFromConf(Configuration conf) { + String sargString; + if ((sargString = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR)) != null) { + return create(Utilities.deserializeExpression(sargString)); + } else if ((sargString = conf.get(SARG_PUSHDOWN)) != null) { + return create(sargString); + } + return null; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java index c75e820..6ad927d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.io.sarg; +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; @@ -30,27 +33,7 @@ * A factory for creating SearchArguments. */ public class SearchArgumentFactory { - public static final String SARG_PUSHDOWN = "sarg.pushdown"; - - public static SearchArgument create(ExprNodeGenericFuncDesc expression) { - return new SearchArgumentImpl(expression); - } - public static Builder newBuilder() { - return SearchArgumentImpl.newBuilder(); - } - - public static SearchArgument create(String kryo) { - return SearchArgumentImpl.fromKryo(kryo); - } - - public static SearchArgument createFromConf(Configuration conf) { - String sargString = null; - if ((sargString = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR)) != null) { - return create(Utilities.deserializeExpression(sargString)); - } else if ((sargString = conf.get(SARG_PUSHDOWN)) != null) { - return create(sargString); - } - return null; + return new SearchArgumentImpl.BuilderImpl(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index 46f1e4e..1582a75 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -18,47 +18,20 @@ package org.apache.hadoop.hive.ql.io.sarg; -import java.math.BigDecimal; import java.sql.Timestamp; import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Arrays; import java.util.Deque; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.codec.binary.Base64; -import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; /** @@ -74,6 +47,8 @@ private final Object literal; private final List literalList; + // Used by kryo + @SuppressWarnings("unused") PredicateLeafImpl() { operator = null; type = null; @@ -91,7 +66,24 @@ this.type = type; this.columnName = columnName; this.literal = literal; + if (literal != null) { + if (literal.getClass() != type.getValueClass()) { + throw new IllegalArgumentException("Wrong value class " + + literal.getClass().getName() + " for " + type + "." + operator + + " leaf"); + } + } this.literalList = literalList; + if (literalList != null) { + Class valueCls = type.getValueClass(); + for(Object lit: literalList) { + if (lit != null && lit.getClass() != valueCls) { + throw new IllegalArgumentException("Wrong value class item " + + lit.getClass().getName() + " for " + type + "." + operator + + " leaf"); + } + } + } } @Override @@ -138,7 +130,7 @@ public String toString() { } else if (literalList != null) { for(Object lit: literalList) { buffer.append(' '); - buffer.append(lit.toString()); + buffer.append(lit == null ? "null" : lit.toString()); } } buffer.append(')'); @@ -146,13 +138,9 @@ public String toString() { } private static boolean isEqual(Object left, Object right) { - if (left == right) { - return true; - } else if (left == null || right == null) { - return false; - } else { - return left.equals(right); - } + + return left == right || + (left != null && right != null && left.equals(right)); } @Override @@ -182,286 +170,315 @@ public int hashCode() { } } - static class ExpressionBuilder { - // max threshold for CNF conversion. having >8 elements in andList will be converted to maybe + + private final List leaves; + private final ExpressionTree expression; + + SearchArgumentImpl(ExpressionTree expression, List leaves) { + this.expression = expression; + this.leaves = leaves; + } + + // Used by kyro + @SuppressWarnings("unused") + SearchArgumentImpl() { + leaves = null; + expression = null; + } + + @Override + public List getLeaves() { + return leaves; + } + + @Override + public TruthValue evaluate(TruthValue[] leaves) { + return expression == null ? TruthValue.YES : expression.evaluate(leaves); + } + + @Override + public ExpressionTree getExpression() { + return expression; + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder(); + for(int i=0; i < leaves.size(); ++i) { + buffer.append("leaf-"); + buffer.append(i); + buffer.append(" = "); + buffer.append(leaves.get(i).toString()); + buffer.append('\n'); + } + buffer.append("expr = "); + buffer.append(expression); + return buffer.toString(); + } + + public String toKryo() { + Output out = new Output(4 * 1024, 10 * 1024 * 1024); + new Kryo().writeObject(out, this); + out.close(); + return Base64.encodeBase64String(out.toBytes()); + } + + static class BuilderImpl implements Builder { + + // max threshold for CNF conversion. having >8 elements in andList will be + // converted to maybe private static final int CNF_COMBINATIONS_THRESHOLD = 256; - private final List leaves = new ArrayList(); - /** - * Get the type of the given expression node. - * @param expr the expression to get the type of - * @return int, string, or float or null if we don't know the type - */ - private static PredicateLeaf.Type getType(ExprNodeDesc expr) { - TypeInfo type = expr.getTypeInfo(); - if (type.getCategory() == ObjectInspector.Category.PRIMITIVE) { - switch (((PrimitiveTypeInfo) type).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - return PredicateLeaf.Type.INTEGER; - case LONG: - return PredicateLeaf.Type.LONG; - case CHAR: - case VARCHAR: - case STRING: - return PredicateLeaf.Type.STRING; - case FLOAT: - case DOUBLE: - return PredicateLeaf.Type.FLOAT; - case DATE: - return PredicateLeaf.Type.DATE; - case TIMESTAMP: - return PredicateLeaf.Type.TIMESTAMP; - case DECIMAL: - return PredicateLeaf.Type.DECIMAL; - case BOOLEAN: - return PredicateLeaf.Type.BOOLEAN; - default: - } - } - return null; + private final Deque currentTree = + new ArrayDeque(); + private final Map leaves = + new HashMap(); + private final ExpressionTree root = + new ExpressionTree(ExpressionTree.Operator.AND); + { + currentTree.add(root); } - /** - * Get the column name referenced in the expression. It must be at the top - * level of this expression and there must be exactly one column. - * @param expr the expression to look in - * @param variable the slot the variable is expected in - * @return the column name or null if there isn't exactly one column - */ - private static String getColumnName(ExprNodeGenericFuncDesc expr, - int variable) { - List children = expr.getChildren(); - if (variable < 0 || variable >= children.size()) { - return null; + @Override + public Builder startOr() { + ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.OR); + currentTree.getFirst().getChildren().add(node); + currentTree.addFirst(node); + return this; + } + + @Override + public Builder startAnd() { + ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.AND); + currentTree.getFirst().getChildren().add(node); + currentTree.addFirst(node); + return this; + } + + @Override + public Builder startNot() { + ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.NOT); + currentTree.getFirst().getChildren().add(node); + currentTree.addFirst(node); + return this; + } + + @Override + public Builder end() { + ExpressionTree current = currentTree.removeFirst(); + if (current.getChildren().size() == 0) { + throw new IllegalArgumentException("Can't create expression " + root + + " with no children."); } - ExprNodeDesc child = children.get(variable); - if (child instanceof ExprNodeColumnDesc) { - return ((ExprNodeColumnDesc) child).getColumn(); + if (current.getOperator() == ExpressionTree.Operator.NOT && + current.getChildren().size() != 1) { + throw new IllegalArgumentException("Can't create not expression " + + current + " with more than 1 child."); } - return null; + return this; } - private static Object boxLiteral(ExprNodeConstantDesc lit) { - switch (getType(lit)) { - case INTEGER: - return ((Number) lit.getValue()).intValue(); - case LONG: - return ((Number) lit.getValue()).longValue(); - case STRING: - return StringUtils.stripEnd(lit.getValue().toString(), null); - case FLOAT: - return Double.parseDouble(lit.getValue().toString()); - case DATE: - case TIMESTAMP: - case DECIMAL: - case BOOLEAN: - return lit; - default: - throw new IllegalArgumentException("Unknown literal " + getType(lit)); + private int addLeaf(PredicateLeaf leaf) { + Integer result = leaves.get(leaf); + if (result == null) { + int id = leaves.size(); + leaves.put(leaf, id); + return id; + } else { + return result; } } - private static Object getLiteral(ExprNodeGenericFuncDesc expr) { - Object result = null; - List children = expr.getChildren(); - if (children.size() != 2) { - return null; - } - for(ExprNodeDesc child: children) { - if (child instanceof ExprNodeConstantDesc) { - if (result != null) { - return null; - } - result = boxLiteral((ExprNodeConstantDesc) child); - } + @Override + public Builder lessThan(String column, PredicateLeaf.Type type, + Object literal) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null || literal == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN, + type, column, literal, null); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } - return result; + return this; } - private static List getLiteralList(ExprNodeGenericFuncDesc expr, - int start) { - List result = new ArrayList(); - List children = expr.getChildren(); - // ignore the first child, since it is the variable - for(ExprNodeDesc child: children.subList(start, children.size())) { - if (child instanceof ExprNodeConstantDesc) { - result.add(boxLiteral((ExprNodeConstantDesc) child)); - } else { - // if we get some non-literals, we need to punt - return null; - } + @Override + public Builder lessThanEquals(String column, PredicateLeaf.Type type, + Object literal) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null || literal == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS, + type, column, literal, null); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } - return result; + return this; } - private ExpressionTree createLeaf(PredicateLeaf.Operator operator, - ExprNodeGenericFuncDesc expression, - List leafCache, - int variable) { - String columnName = getColumnName(expression, variable); - if (columnName == null) { - return new ExpressionTree(TruthValue.YES_NO_NULL); - } - PredicateLeaf.Type type = getType(expression.getChildren().get(variable)); - if (type == null) { - return new ExpressionTree(TruthValue.YES_NO_NULL); + @Override + public Builder equals(String column, PredicateLeaf.Type type, + Object literal) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null || literal == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS, + type, column, literal, null); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } + return this; + } - Object literal = null; - List literalList = null; - switch (operator) { - case IS_NULL: - break; - case IN: - case BETWEEN: - literalList = getLiteralList(expression, variable + 1); - if (literalList == null) { - return new ExpressionTree(TruthValue.YES_NO_NULL); - } - break; - default: - literal = getLiteral(expression); - if (literal == null) { - return new ExpressionTree(TruthValue.YES_NO_NULL); - } - break; + @Override + public Builder nullSafeEquals(String column, PredicateLeaf.Type type, + Object literal) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null || literal == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + type, column, literal, null); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } - // if the variable was on the right, we need to swap things around - boolean needSwap = false; - if (variable != 0) { - if (operator == PredicateLeaf.Operator.LESS_THAN) { - needSwap = true; - operator = PredicateLeaf.Operator.LESS_THAN_EQUALS; - } else if (operator == PredicateLeaf.Operator.LESS_THAN_EQUALS) { - needSwap = true; - operator = PredicateLeaf.Operator.LESS_THAN; + return this; + } + + @Override + public Builder in(String column, PredicateLeaf.Type type, + Object... literal) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null || literal == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + if (literal.length == 0) { + throw new IllegalArgumentException("Can't create in expression with " + + "no arguments"); } + List argList = new ArrayList(); + argList.addAll(Arrays.asList(literal)); + + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.IN, + type, column, null, argList); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } - leafCache.add(new PredicateLeafImpl(operator, type, columnName, - literal, literalList)); - ExpressionTree result = new ExpressionTree(leafCache.size() - 1); - if (needSwap) { - result = negate(result); - } - return result; + return this; } - /** - * Find the variable in the expression. - * @param expr the expression to look in - * @return the index of the variable or -1 if there is not exactly one - * variable. - */ - private int findVariable(ExprNodeDesc expr) { - int result = -1; - List children = expr.getChildren(); - for(int i = 0; i < children.size(); ++i) { - ExprNodeDesc child = children.get(i); - if (child instanceof ExprNodeColumnDesc) { - // if we already found a variable, this isn't a sarg - if (result != -1) { - return -1; - } else { - result = i; - } - } + @Override + public Builder isNull(String column, PredicateLeaf.Type type) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL, + type, column, null, null); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } - return result; + return this; } - /** - * Create a leaf expression when we aren't sure where the variable is - * located. - * @param operator the operator type that was found - * @param expression the expression to check - * @param leafCache the list of leaves - * @return if the expression is a sarg, return it, otherwise null - */ - private ExpressionTree createLeaf(PredicateLeaf.Operator operator, - ExprNodeGenericFuncDesc expression, - List leafCache) { - return createLeaf(operator, expression, leafCache, - findVariable(expression)); + @Override + public Builder between(String column, PredicateLeaf.Type type, Object lower, + Object upper) { + ExpressionTree parent = currentTree.getFirst(); + if (column == null || lower == null || upper == null) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + } else { + List argList = new ArrayList(); + argList.add(lower); + argList.add(upper); + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN, + type, column, null, argList); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); + } + return this; } - private ExpressionTree negate(ExpressionTree expr) { - ExpressionTree result = new ExpressionTree(ExpressionTree.Operator.NOT); - result.getChildren().add(expr); - return result; + @Override + public Builder literal(TruthValue truth) { + ExpressionTree parent = currentTree.getFirst(); + parent.getChildren().add(new ExpressionTree(truth)); + return this; } - private void addChildren(ExpressionTree result, - ExprNodeGenericFuncDesc node, - List leafCache) { - for(ExprNodeDesc child: node.getChildren()) { - result.getChildren().add(parse(child, leafCache)); + /** + * Recursively explore the tree to find the leaves that are still reachable + * after optimizations. + * @param tree the node to check next + * @param next the next available leaf id + * @param leafReorder + * @return the next available leaf id + */ + static int compactLeaves(ExpressionTree tree, int next, int[] leafReorder) { + if (tree.getOperator() == ExpressionTree.Operator.LEAF) { + int oldLeaf = tree.getLeaf(); + if (leafReorder[oldLeaf] == -1) { + leafReorder[oldLeaf] = next++; + } + } else if (tree.getChildren() != null){ + for(ExpressionTree child: tree.getChildren()) { + next = compactLeaves(child, next, leafReorder); + } } + return next; } /** - * Do the recursive parse of the Hive ExprNodeDesc into our ExpressionTree. - * @param expression the Hive ExprNodeDesc - * @return the non-normalized ExpressionTree + * Rewrite expression tree to update the leaves. + * @param root the root of the tree to fix + * @param leafReorder a map from old leaf ids to new leaf ids + * @return the fixed root */ - private ExpressionTree parse(ExprNodeDesc expression, - List leafCache) { - // if we don't know the expression, just assume maybe - if (expression.getClass() != ExprNodeGenericFuncDesc.class) { - return new ExpressionTree(TruthValue.YES_NO_NULL); + static ExpressionTree rewriteLeaves(ExpressionTree root, + int[] leafReorder) { + if (root.getOperator() == ExpressionTree.Operator.LEAF) { + return new ExpressionTree(leafReorder[root.getLeaf()]); + } else if (root.getChildren() != null){ + List children = root.getChildren(); + for(int i=0; i < children.size(); ++i) { + children.set(i, rewriteLeaves(children.get(i), leafReorder)); + } } - // get the kind of expression - ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) expression; - Class op = expr.getGenericUDF().getClass(); - ExpressionTree result; - - // handle the logical operators - if (op == GenericUDFOPOr.class) { - result = new ExpressionTree(ExpressionTree.Operator.OR); - addChildren(result, expr, leafCache); - } else if (op == GenericUDFOPAnd.class) { - result = new ExpressionTree(ExpressionTree.Operator.AND); - addChildren(result, expr, leafCache); - } else if (op == GenericUDFOPNot.class) { - result = new ExpressionTree(ExpressionTree.Operator.NOT); - addChildren(result, expr, leafCache); - } else if (op == GenericUDFOPEqual.class) { - result = createLeaf(PredicateLeaf.Operator.EQUALS, expr, leafCache); - } else if (op == GenericUDFOPNotEqual.class) { - result = negate(createLeaf(PredicateLeaf.Operator.EQUALS, expr, - leafCache)); - } else if (op == GenericUDFOPEqualNS.class) { - result = createLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, expr, - leafCache); - } else if (op == GenericUDFOPGreaterThan.class) { - result = negate(createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, - expr, leafCache)); - } else if (op == GenericUDFOPEqualOrGreaterThan.class) { - result = negate(createLeaf(PredicateLeaf.Operator.LESS_THAN, expr, - leafCache)); - } else if (op == GenericUDFOPLessThan.class) { - result = createLeaf(PredicateLeaf.Operator.LESS_THAN, expr, leafCache); - } else if (op == GenericUDFOPEqualOrLessThan.class) { - result = createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr, - leafCache); - } else if (op == GenericUDFIn.class) { - result = createLeaf(PredicateLeaf.Operator.IN, expr, leafCache, 0); - } else if (op == GenericUDFBetween.class) { - result = createLeaf(PredicateLeaf.Operator.BETWEEN, expr, leafCache, - 1); - } else if (op == GenericUDFOPNull.class) { - result = createLeaf(PredicateLeaf.Operator.IS_NULL, expr, leafCache, - 0); - } else if (op == GenericUDFOPNotNull.class) { - result = negate(createLeaf(PredicateLeaf.Operator.IS_NULL, expr, - leafCache, 0)); + return root; + } - // otherwise, we didn't understand it, so mark it maybe - } else { - result = new ExpressionTree(TruthValue.YES_NO_NULL); + @Override + public SearchArgument build() { + if (currentTree.size() != 1) { + throw new IllegalArgumentException("Failed to end " + + currentTree.size() + " operations."); } - return result; + ExpressionTree optimized = pushDownNot(root); + optimized = foldMaybe(optimized); + optimized = flatten(optimized); + optimized = convertToCNF(optimized); + optimized = flatten(optimized); + int leafReorder[] = new int[leaves.size()]; + Arrays.fill(leafReorder, -1); + int newLeafCount = compactLeaves(optimized, 0, leafReorder); + optimized = rewriteLeaves(optimized, leafReorder); + ArrayList leafList = new ArrayList<>(newLeafCount); + // expand list to correct size + for(int i=0; i < newLeafCount; ++i) { + leafList.add(null); + } + // build the new list + for(Map.Entry elem: leaves.entrySet()) { + int newLoc = leafReorder[elem.getValue()]; + if (newLoc != -1) { + leafList.set(newLoc, elem.getKey()); + } + } + return new SearchArgumentImpl(optimized, leafList); } /** @@ -528,7 +545,7 @@ static ExpressionTree foldMaybe(ExpressionTree expr) { return child; default: throw new IllegalStateException("Got a maybe as child of " + - expr); + expr); } } else { expr.getChildren().set(i, child); @@ -542,6 +559,45 @@ static ExpressionTree foldMaybe(ExpressionTree expr) { } /** + * Converts multi-level ands and ors into single level ones. + * @param root the expression to flatten + * @return the flattened expression, which will always be root with + * potentially modified children. + */ + static ExpressionTree flatten(ExpressionTree root) { + if (root.getChildren() != null) { + // iterate through the index, so that if we add more children, + // they don't get re-visited + for(int i=0; i < root.getChildren().size(); ++i) { + ExpressionTree child = flatten(root.getChildren().get(i)); + // do we need to flatten? + if (child.getOperator() == root.getOperator() && + child.getOperator() != ExpressionTree.Operator.NOT) { + boolean first = true; + for(ExpressionTree grandkid: child.getChildren()) { + // for the first grandkid replace the original parent + if (first) { + first = false; + root.getChildren().set(i, grandkid); + } else { + root.getChildren().add(++i, grandkid); + } + } + } else { + root.getChildren().set(i, child); + } + } + // if we have a singleton AND or OR, just return the child + if ((root.getOperator() == ExpressionTree.Operator.OR || + root.getOperator() == ExpressionTree.Operator.AND) && + root.getChildren().size() == 1) { + return root.getChildren().get(0); + } + } + return root; + } + + /** * Generate all combinations of items on the andList. For each item on the * andList, it generates all combinations of one child from each and * expression. Thus, (and a b) (and c d) will be expanded to: (or a c) @@ -554,7 +610,7 @@ static ExpressionTree foldMaybe(ExpressionTree expr) { private static void generateAllCombinations(List result, List andList, List nonAndList - ) { + ) { List kids = andList.get(0).getChildren(); if (result.isEmpty()) { for(ExpressionTree kid: kids) { @@ -637,391 +693,5 @@ private static boolean checkCombinationsThreshold(List andList) return true; } - /** - * Converts multi-level ands and ors into single level ones. - * @param root the expression to flatten - * @return the flattened expression, which will always be root with - * potentially modified children. - */ - static ExpressionTree flatten(ExpressionTree root) { - if (root.getChildren() != null) { - // iterate through the index, so that if we add more children, - // they don't get re-visited - for(int i=0; i < root.getChildren().size(); ++i) { - ExpressionTree child = flatten(root.getChildren().get(i)); - // do we need to flatten? - if (child.getOperator() == root.getOperator() && - child.getOperator() != ExpressionTree.Operator.NOT) { - boolean first = true; - for(ExpressionTree grandkid: child.getChildren()) { - // for the first grandkid replace the original parent - if (first) { - first = false; - root.getChildren().set(i, grandkid); - } else { - root.getChildren().add(++i, grandkid); - } - } - } else { - root.getChildren().set(i, child); - } - } - // if we have a singleton AND or OR, just return the child - if ((root.getOperator() == ExpressionTree.Operator.OR || - root.getOperator() == ExpressionTree.Operator.AND) && - root.getChildren().size() == 1) { - return root.getChildren().get(0); - } - } - return root; - } - - /** - * Iterates through the expression, finding all of the leaves. It creates - * the leaves list with each unique leaf that is found in the expression. - * The expression is updated with the new leaf ids for each leaf. - * @param expr the expression to find the leaves in - * @param leafCache the list of all of the leaves - * @param lookup a map that is used to uniquify the leaves - * @return The potentially modified expression - */ - private ExpressionTree buildLeafList(ExpressionTree expr, - List leafCache, - Map lookup) { - if (expr.getChildren() != null) { - for(int i=0; i < expr.getChildren().size(); ++i) { - expr.getChildren().set(i, buildLeafList(expr.getChildren().get(i), - leafCache, lookup)); - } - } else if (expr.getOperator() == ExpressionTree.Operator.LEAF) { - PredicateLeaf leaf = leafCache.get(expr.getLeaf()); - ExpressionTree val = lookup.get(leaf); - if (val == null) { - val = new ExpressionTree(leaves.size()); - lookup.put(leaf, val); - leaves.add(leaf); - } - return val; - } - return expr; - } - - /** - * Builds the expression and leaf list from the original predicate. - * @param expression the expression to translate - * @return The normalized expression. - */ - ExpressionTree expression(ExprNodeGenericFuncDesc expression) { - List leafCache = new ArrayList(); - ExpressionTree expr = parse(expression, leafCache); - return expression(expr, leafCache); - } - - /** - * Builds the expression and optimized leaf list from a non-normalized - * expression. Sets the leaves field with the unique leaves. - * @param expr non-normalized expression - * @param leaves non-unique leaves - * @return the normalized expression - */ - ExpressionTree expression(ExpressionTree expr, - List leaves) { - expr = pushDownNot(expr); - expr = foldMaybe(expr); - expr = flatten(expr); - expr = convertToCNF(expr); - expr = flatten(expr); - expr = buildLeafList(expr, leaves, - new HashMap()); - return expr; - } - - List getLeaves() { - return leaves; - } - } - - private final List leaves; - private final ExpressionTree expression; - - SearchArgumentImpl(ExprNodeGenericFuncDesc expr) { - if (expr == null) { - leaves = new ArrayList(); - expression = null; - } else { - ExpressionBuilder builder = new ExpressionBuilder(); - expression = builder.expression(expr); - leaves = builder.getLeaves(); - } - } - - SearchArgumentImpl() { - leaves = null; - expression = null; - } - - SearchArgumentImpl(ExpressionTree expression, List leaves) { - this.expression = expression; - this.leaves = leaves; - } - - @Override - public List getLeaves() { - return leaves; - } - - @Override - public TruthValue evaluate(TruthValue[] leaves) { - return expression == null ? TruthValue.YES : expression.evaluate(leaves); - } - - @Override - public ExpressionTree getExpression() { - return expression; - } - - @Override - public String toString() { - StringBuilder buffer = new StringBuilder(); - for(int i=0; i < leaves.size(); ++i) { - buffer.append("leaf-"); - buffer.append(i); - buffer.append(" = "); - buffer.append(leaves.get(i).toString()); - buffer.append('\n'); - } - buffer.append("expr = "); - buffer.append(expression); - return buffer.toString(); - } - - public String toKryo() { - Output out = new Output(4 * 1024, 10 * 1024 * 1024); - new Kryo().writeObject(out, this); - out.close(); - return Base64.encodeBase64String(out.toBytes()); - } - - static SearchArgument fromKryo(String value) { - Input input = new Input(Base64.decodeBase64(value)); - return new Kryo().readObject(input, SearchArgumentImpl.class); - } - - private static class BuilderImpl implements Builder { - private final Deque currentTree = - new ArrayDeque(); - private final List leaves = new ArrayList(); - private ExpressionTree root = null; - - @Override - public Builder startOr() { - ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.OR); - if (currentTree.size() != 0) { - ExpressionTree parent = currentTree.getFirst(); - parent.getChildren().add(node); - } - currentTree.addFirst(node); - return this; - } - - @Override - public Builder startAnd() { - ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.AND); - if (currentTree.size() != 0) { - ExpressionTree parent = currentTree.getFirst(); - parent.getChildren().add(node); - } - currentTree.addFirst(node); - return this; - } - - @Override - public Builder startNot() { - ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.NOT); - if (currentTree.size() != 0) { - ExpressionTree parent = currentTree.getFirst(); - parent.getChildren().add(node); - } - currentTree.addFirst(node); - return this; - } - - @Override - public Builder end() { - root = currentTree.removeFirst(); - if (root.getChildren().size() == 0) { - throw new IllegalArgumentException("Can't create expression " + root + - " with no children."); - } - if (root.getOperator() == ExpressionTree.Operator.NOT && - root.getChildren().size() != 1) { - throw new IllegalArgumentException("Can't create not expression " + - root + " with more than 1 child."); - } - return this; - } - - private static Object boxLiteral(Object literal) { - if (literal instanceof String || - literal instanceof Long || - literal instanceof Double || - literal instanceof DateWritable || - literal instanceof Timestamp || - literal instanceof HiveDecimal || - literal instanceof BigDecimal || - literal instanceof Boolean) { - return literal; - } else if (literal instanceof HiveChar || - literal instanceof HiveVarchar) { - return StringUtils.stripEnd(literal.toString(), null); - } else if (literal instanceof Byte || - literal instanceof Short || - literal instanceof Integer) { - return ((Number) literal).longValue(); - } else if (literal instanceof Float) { - // to avoid change in precision when upcasting float to double - // we convert the literal to string and parse it as double. (HIVE-8460) - return Double.parseDouble(literal.toString()); - } else { - throw new IllegalArgumentException("Unknown type for literal " + - literal); - } - } - - private static PredicateLeaf.Type getType(Object literal) { - if (literal instanceof Byte || - literal instanceof Short || - literal instanceof Integer) { - return PredicateLeaf.Type.INTEGER; - } else if(literal instanceof Long){ - return PredicateLeaf.Type.LONG; - }else if (literal instanceof HiveChar || - literal instanceof HiveVarchar || - literal instanceof String) { - return PredicateLeaf.Type.STRING; - } else if (literal instanceof Float || - literal instanceof Double) { - return PredicateLeaf.Type.FLOAT; - } else if (literal instanceof DateWritable) { - return PredicateLeaf.Type.DATE; - } else if (literal instanceof Timestamp) { - return PredicateLeaf.Type.TIMESTAMP; - }else if (literal instanceof HiveDecimal || - literal instanceof BigDecimal) { - return PredicateLeaf.Type.DECIMAL; - } else if (literal instanceof Boolean) { - return PredicateLeaf.Type.BOOLEAN; - } - throw new IllegalArgumentException("Unknown type for literal " + literal); - } - - @Override - public Builder lessThan(String column, Object literal) { - ExpressionTree parent = currentTree.getFirst(); - Object box = boxLiteral(literal); - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN, - getType(box), column, box, null); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public Builder lessThanEquals(String column, Object literal) { - ExpressionTree parent = currentTree.getFirst(); - Object box = boxLiteral(literal); - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS, - getType(box), column, box, null); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public Builder equals(String column, Object literal) { - ExpressionTree parent = currentTree.getFirst(); - Object box = boxLiteral(literal); - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS, - getType(box), column, box, null); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public Builder nullSafeEquals(String column, Object literal) { - ExpressionTree parent = currentTree.getFirst(); - Object box = boxLiteral(literal); - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - getType(box), column, box, null); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public Builder in(String column, Object... literal) { - ExpressionTree parent = currentTree.getFirst(); - if (literal.length == 0) { - throw new IllegalArgumentException("Can't create in expression with " - + "no arguments"); - } - List argList = new ArrayList(); - for(Object lit: literal){ - argList.add(boxLiteral(lit)); - } - - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.IN, - getType(argList.get(0)), column, null, argList); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public Builder isNull(String column) { - ExpressionTree parent = currentTree.getFirst(); - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL, - PredicateLeaf.Type.STRING, column, null, null); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public Builder between(String column, Object lower, Object upper) { - ExpressionTree parent = currentTree.getFirst(); - List argList = new ArrayList(); - argList.add(boxLiteral(lower)); - argList.add(boxLiteral(upper)); - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN, - getType(argList.get(0)), column, null, argList); - leaves.add(leaf); - parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); - return this; - } - - @Override - public SearchArgument build() { - if (currentTree.size() != 0) { - throw new IllegalArgumentException("Failed to end " + - currentTree.size() + " operations."); - } - ExpressionBuilder internal = new ExpressionBuilder(); - ExpressionTree normalized = internal.expression(root, leaves); - return new SearchArgumentImpl(normalized, internal.getLeaves()); - } - } - - public static Builder newBuilder() { - return new BuilderImpl(); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index e96ab2a..e40e1d2 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -66,6 +66,7 @@ import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy; +import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; @@ -1746,8 +1747,8 @@ public void testSetSearchArgument() throws Exception { types.add(builder.build()); types.add(builder.build()); SearchArgument isNull = SearchArgumentFactory.newBuilder() - .startAnd().isNull("cost").end().build(); - conf.set(SearchArgumentFactory.SARG_PUSHDOWN, isNull.toKryo()); + .startAnd().isNull("cost", PredicateLeaf.Type.INTEGER).end().build(); + conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, isNull.toKryo()); conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "url,cost"); options.include(new boolean[]{true, true, false, true, false}); @@ -1791,7 +1792,7 @@ public void testSplitElimination() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("z", new Integer(0)) + .lessThan("z", PredicateLeaf.Type.INTEGER, new Integer(0)) .end() .build(); conf.set("sarg.pushdown", sarg.toKryo()); @@ -1833,7 +1834,7 @@ public void testSplitEliminationNullStats() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("z", new String("foo")) + .lessThan("z", PredicateLeaf.Type.STRING, new String("foo")) .end() .build(); conf.set("sarg.pushdown", sarg.toKryo()); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index 255565e..4480d22 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.orc.OrcFile.Version; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.serde2.io.ByteWritable; @@ -1922,9 +1923,9 @@ public void testPredicatePushdown() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() .startNot() - .lessThan("int1", 300000) + .lessThan("int1", PredicateLeaf.Type.INTEGER, 300000) .end() - .lessThan("int1", 600000) + .lessThan("int1", PredicateLeaf.Type.INTEGER, 600000) .end() .build(); RecordReader rows = reader.rowsOptions(new Reader.Options() @@ -1945,7 +1946,7 @@ public void testPredicatePushdown() throws Exception { // look through the file with no rows selected sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("int1", 0) + .lessThan("int1", PredicateLeaf.Type.INTEGER, 0) .end() .build(); rows = reader.rowsOptions(new Reader.Options() @@ -1958,9 +1959,9 @@ public void testPredicatePushdown() throws Exception { // select first 100 and last 100 rows sarg = SearchArgumentFactory.newBuilder() .startOr() - .lessThan("int1", 300 * 100) + .lessThan("int1", PredicateLeaf.Type.INTEGER, 300 * 100) .startNot() - .lessThan("int1", 300 * 3400) + .lessThan("int1", PredicateLeaf.Type.INTEGER, 300 * 3400) .end() .end() .build(); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java index 957f54e..7957cb4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java @@ -42,7 +42,9 @@ import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; import org.apache.hadoop.hive.ql.io.sarg.TestSearchArgumentImpl; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.io.DataOutputBuffer; import org.junit.Test; import org.mockito.MockSettings; @@ -351,14 +353,14 @@ public void testPredEvalWithBooleanStats() throws Exception { RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf( - PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", "true", null); + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null)); assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf( - PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", "hello", null); + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", false, null); assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null)); assertEquals(TruthValue.YES_NO, @@ -368,7 +370,7 @@ public void testPredEvalWithBooleanStats() throws Exception { @Test public void testPredEvalWithIntStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( - PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); @@ -390,7 +392,7 @@ public void testPredEvalWithIntStats() throws Exception { RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); + PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); @@ -403,7 +405,7 @@ public void testPredEvalWithIntStats() throws Exception { @Test public void testPredEvalWithDoubleStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( - PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); @@ -425,7 +427,7 @@ public void testPredEvalWithDoubleStats() throws Exception { RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); + PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); @@ -461,10 +463,10 @@ public void testPredEvalWithStringStats() throws Exception { pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(100).get(), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 1000), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(100), null); + PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("100"), null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null)); @@ -477,7 +479,7 @@ public void testPredEvalWithStringStats() throws Exception { @Test public void testPredEvalWithDateStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( - PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); // Date to Integer conversion is not possible. assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); @@ -530,7 +532,7 @@ public void testPredEvalWithDateStats() throws Exception { // Date to Decimal conversion is also not possible. pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); + PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); @@ -548,7 +550,7 @@ public void testPredEvalWithDateStats() throws Exception { @Test public void testPredEvalWithDecimalStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( - PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15, null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); @@ -570,7 +572,7 @@ public void testPredEvalWithDecimalStats() throws Exception { RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); + PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); @@ -588,7 +590,7 @@ public void testPredEvalWithDecimalStats() throws Exception { @Test public void testPredEvalWithTimestampStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( - PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15, null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); @@ -618,7 +620,7 @@ public void testPredEvalWithTimestampStats() throws Exception { 100 * 24L * 60L * 60L * 1000L), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); + PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null); assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); assertEquals(TruthValue.YES_NO, @@ -636,7 +638,7 @@ public void testPredEvalWithTimestampStats() throws Exception { public void testEquals() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.INTEGER, - "x", 15L, null); + "x", 15, null); assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null)); assertEquals(TruthValue.YES_NO_NULL, @@ -655,7 +657,7 @@ public void testEquals() throws Exception { public void testNullSafeEquals() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, - "x", 15L, null); + "x", 15, null); assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null)); assertEquals(TruthValue.YES_NO, @@ -674,7 +676,7 @@ public void testNullSafeEquals() throws Exception { public void testLessThan() throws Exception { PredicateLeaf lessThan = TestSearchArgumentImpl.createPredicateLeaf (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.INTEGER, - "x", 15L, null); + "x", 15, null); assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), lessThan, null)); assertEquals(TruthValue.NO_NULL, @@ -691,7 +693,7 @@ public void testLessThan() throws Exception { public void testLessThanEquals() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.INTEGER, - "x", 15L, null); + "x", 15, null); assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null)); assertEquals(TruthValue.YES_NO_NULL, @@ -707,8 +709,8 @@ public void testLessThanEquals() throws Exception { @Test public void testIn() throws Exception { List args = new ArrayList(); - args.add(10L); - args.add(20L); + args.add(10); + args.add(20); PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf (PredicateLeaf.Operator.IN, PredicateLeaf.Type.INTEGER, "x", null, args); @@ -725,8 +727,8 @@ public void testIn() throws Exception { @Test public void testBetween() throws Exception { List args = new ArrayList(); - args.add(10L); - args.add(20L); + args.add(10); + args.add(20); PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.INTEGER, "x", null, args); @@ -1274,7 +1276,7 @@ public void testPartialPlanString() throws Exception { @Test public void testIntNullSafeEqualsBloomFilter() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( - PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); BloomFilterIO bf = new BloomFilterIO(10000); for (int i = 20; i < 1000; i++) { bf.addLong(i); @@ -1289,7 +1291,7 @@ public void testIntNullSafeEqualsBloomFilter() throws Exception { @Test public void testIntEqualsBloomFilter() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( - PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); + PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); BloomFilterIO bf = new BloomFilterIO(10000); for (int i = 20; i < 1000; i++) { bf.addLong(i); @@ -1539,7 +1541,7 @@ public void testTimestampInBloomFilter() throws Exception { public void testDecimalNullSafeEqualsBloomFilter() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", - HiveDecimal.create(15), + new HiveDecimalWritable("15"), null); BloomFilterIO bf = new BloomFilterIO(10000); for (int i = 20; i < 1000; i++) { @@ -1555,7 +1557,8 @@ public void testDecimalNullSafeEqualsBloomFilter() throws Exception { @Test public void testDecimalEqualsBloomFilter() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( - PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), + PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DECIMAL, "x", + new HiveDecimalWritable("15"), null); BloomFilterIO bf = new BloomFilterIO(10000); for (int i = 20; i < 1000; i++) { @@ -1571,8 +1574,8 @@ public void testDecimalEqualsBloomFilter() throws Exception { @Test public void testDecimalInBloomFilter() throws Exception { List args = new ArrayList(); - args.add(HiveDecimal.create(15)); - args.add(HiveDecimal.create(19)); + args.add(new HiveDecimalWritable("15")); + args.add(new HiveDecimalWritable("19")); PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DECIMAL, "x", null, args); @@ -1593,9 +1596,9 @@ public void testDecimalInBloomFilter() throws Exception { @Test public void testNullsInBloomFilter() throws Exception { List args = new ArrayList(); - args.add(HiveDecimal.create(15)); + args.add(new HiveDecimalWritable("15")); args.add(null); - args.add(HiveDecimal.create(19)); + args.add(new HiveDecimalWritable("19")); PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DECIMAL, "x", null, args); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java new file mode 100644 index 0000000..87dd344 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.parquet; + +import static junit.framework.Assert.assertEquals; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.junit.Test; + +import java.sql.Date; + +import org.apache.parquet.filter2.predicate.FilterPredicate; + +/** + * These tests test the conversion to Parquet's sarg implementation. + */ +public class TestParquetRecordReaderWrapper { + + private static TruthValue[] values(TruthValue... vals) { + return vals; + } + + @Test + public void testBuilder() throws Exception { + SearchArgument sarg = SearchArgumentFactory.newBuilder() + .startNot() + .startOr() + .isNull("x", PredicateLeaf.Type.INTEGER) + .between("y", PredicateLeaf.Type.INTEGER, 10, 20) + .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3) + .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger") + .end() + .end() + .build(); + + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); + String expected = + "and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), not(or(or(eq(z, 1), " + + "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))"; + assertEquals(expected, p.toString()); + } + + @Test + public void testBuilderComplexTypes() throws Exception { + SearchArgument sarg = + SearchArgumentFactory.newBuilder() + .startAnd() + .lessThan("x", PredicateLeaf.Type.DATE, + Date.valueOf("1970-1-11")) + .lessThanEquals("y", PredicateLeaf.Type.STRING, + new HiveChar("hi", 10).toString()) + .equals("z", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("1.0")) + .end() + .build(); + assertEquals("lteq(y, Binary{\"hi \"})", + ParquetRecordReaderWrapper.toFilterPredicate(sarg).toString()); + + sarg = SearchArgumentFactory.newBuilder() + .startNot() + .startOr() + .isNull("x", PredicateLeaf.Type.INTEGER) + .between("y", PredicateLeaf.Type.DECIMAL, + new HiveDecimalWritable("10"), new HiveDecimalWritable("20.0")) + .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3) + .nullSafeEquals("a", PredicateLeaf.Type.STRING, + new HiveVarchar("stinger", 100).toString()) + .end() + .end() + .build(); + + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); + String expected = + "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " + + "not(eq(a, Binary{\"stinger\"})))"; + assertEquals(expected, p.toString()); + } + + @Test + public void testBuilderComplexTypes2() throws Exception { + SearchArgument sarg = + SearchArgumentFactory.newBuilder() + .startAnd() + .lessThan("x", PredicateLeaf.Type.DATE, Date.valueOf("2005-3-12")) + .lessThanEquals("y", PredicateLeaf.Type.STRING, + new HiveChar("hi", 10).toString()) + .equals("z", PredicateLeaf.Type.DECIMAL, + new HiveDecimalWritable("1.0")) + .end() + .build(); + assertEquals("lteq(y, Binary{\"hi \"})", + ParquetRecordReaderWrapper.toFilterPredicate(sarg).toString()); + + sarg = SearchArgumentFactory.newBuilder() + .startNot() + .startOr() + .isNull("x", PredicateLeaf.Type.INTEGER) + .between("y", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("10"), + new HiveDecimalWritable("20.0")) + .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3) + .nullSafeEquals("a", PredicateLeaf.Type.STRING, + new HiveVarchar("stinger", 100).toString()) + .end() + .end() + .build(); + + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); + String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " + + "not(eq(a, Binary{\"stinger\"})))"; + assertEquals(expected, p.toString()); + } + + @Test + public void testBuilderFloat() throws Exception { + SearchArgument sarg = + SearchArgumentFactory.newBuilder() + .startAnd() + .lessThan("x", PredicateLeaf.Type.INTEGER, new Integer((short) 22)) + .lessThan("x1", PredicateLeaf.Type.INTEGER, new Integer(22)) + .lessThanEquals("y", PredicateLeaf.Type.STRING, + new HiveChar("hi", 10).toString()) + .equals("z", PredicateLeaf.Type.FLOAT, new Double(0.22)) + .equals("z1", PredicateLeaf.Type.FLOAT, new Double(0.22)) + .end() + .build(); + + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); + String expected = "and(and(and(and(lt(x, 22), lt(x1, 22))," + + " lteq(y, Binary{\"hi \"})), eq(z, " + + "0.22)), eq(z1, 0.22))"; + assertEquals(expected, p.toString()); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java new file mode 100644 index 0000000..85e952f --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java @@ -0,0 +1,2863 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.sarg; + +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertNull; +import static junit.framework.Assert.assertTrue; + +import com.google.common.collect.Sets; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.PredicateLeafImpl; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.junit.Test; + +import java.beans.XMLDecoder; +import java.io.ByteArrayInputStream; +import java.io.UnsupportedEncodingException; +import java.lang.reflect.Field; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.List; +import java.util.Set; + +import org.apache.parquet.filter2.predicate.FilterPredicate; + +/** + * These tests cover the conversion from Hive's AST to SearchArguments. + */ +public class TestConvertAstToSearchArg { + + private static void assertNoSharedNodes(ExpressionTree tree, + Set seen + ) throws Exception { + if (seen.contains(tree) && + tree.getOperator() != ExpressionTree.Operator.LEAF) { + assertTrue("repeated node in expression " + tree, false); + } + seen.add(tree); + if (tree.getChildren() != null) { + for (ExpressionTree child : tree.getChildren()) { + assertNoSharedNodes(child, seen); + } + } + } + + private ExprNodeGenericFuncDesc getFuncDesc(String xmlSerialized) { + byte[] bytes; + try { + bytes = xmlSerialized.getBytes("UTF-8"); + } catch (UnsupportedEncodingException ex) { + throw new RuntimeException("UTF-8 support required", ex); + } + + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + XMLDecoder decoder = new XMLDecoder(bais, null, null); + + try { + return (ExprNodeGenericFuncDesc) decoder.readObject(); + } finally { + decoder.close(); + } + } + + @Test + public void testExpression1() throws Exception { + // first_name = 'john' or + // 'greg' < first_name or + // 'alan' > first_name or + // id > 12 or + // 13 < id or + // id < 15 or + // 16 > id or + // (id <=> 30 and first_name <=> 'owen') + String exprStr = " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " string \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " john \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " boolean \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " greg \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " alan \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " int \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 12 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 13 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 15 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 16 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 30 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " owen \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n"; + + SearchArgumentImpl sarg = + (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + List leaves = sarg.getLeaves(); + assertEquals(9, leaves.size()); + + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); + String[] conditions = new String[]{ + "eq(first_name, Binary{\"john\"})", /* first_name = 'john' */ + "not(lteq(first_name, Binary{\"greg\"}))", /* 'greg' < first_name */ + "lt(first_name, Binary{\"alan\"})", /* 'alan' > first_name */ + "not(lteq(id, 12))", /* id > 12 or */ + "not(lteq(id, 13))", /* 13 < id or */ + "lt(id, 15)", /* id < 15 or */ + "lt(id, 16)", /* 16 > id or */ + "eq(id, 30)", /* id <=> 30 */ + "eq(first_name, Binary{\"owen\"})" /* first_name <=> 'owen' */ + }; + String expected = String + .format("and(or(or(or(or(or(or(or(%1$s, %2$s), %3$s), %4$s), %5$s), %6$s), %7$s), %8$s), " + + "or(or(or(or(or(or(or(%1$s, %2$s), %3$s), %4$s), %5$s), %6$s), %7$s), %9$s))", conditions); + assertEquals(expected, p.toString()); + + PredicateLeaf leaf = leaves.get(0); + assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); + assertEquals("first_name", leaf.getColumnName()); + assertEquals("john", leaf.getLiteral()); + + leaf = leaves.get(1); + assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); + assertEquals("first_name", leaf.getColumnName()); + assertEquals("greg", leaf.getLiteral()); + + leaf = leaves.get(2); + assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("first_name", leaf.getColumnName()); + assertEquals("alan", leaf.getLiteral()); + + leaf = leaves.get(3); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(12, leaf.getLiteral()); + + leaf = leaves.get(4); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(13, leaf.getLiteral()); + + leaf = leaves.get(5); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(15, leaf.getLiteral()); + + leaf = leaves.get(6); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(16, leaf.getLiteral()); + + leaf = leaves.get(7); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(30, leaf.getLiteral()); + + leaf = leaves.get(8); + assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator()); + assertEquals("first_name", leaf.getColumnName()); + assertEquals("owen", leaf.getLiteral()); + + assertEquals("(and (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" + + " (not leaf-4) leaf-5 leaf-6 leaf-7)" + + " (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" + + " (not leaf-4) leaf-5 leaf-6 leaf-8))", + sarg.getExpression().toString()); + assertNoSharedNodes(sarg.getExpression(), + Sets.newIdentityHashSet()); + } + + @Test + public void testExpression2() throws Exception { + /* first_name is null or + first_name <> 'sue' or + id >= 12 or + id <= 4; */ + String exprStr = " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " string \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " boolean \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " sue \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " int \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 12 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 4 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n"; + + SearchArgumentImpl sarg = + (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + List leaves = sarg.getLeaves(); + assertEquals(4, leaves.size()); + + String[] conditions = new String[]{ + "eq(first_name, null)", /* first_name is null */ + "not(eq(first_name, Binary{\"sue\"}))", /* first_name <> 'sue' */ + "not(lt(id, 12))", /* id >= 12 */ + "lteq(id, 4)" /* id <= 4 */ + }; + + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); + String expected = String.format("or(or(or(%1$s, %2$s), %3$s), %4$s)", conditions); + assertEquals(expected, p.toString()); + + PredicateLeaf leaf = leaves.get(0); + assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Operator.IS_NULL, leaf.getOperator()); + assertEquals("first_name", leaf.getColumnName()); + assertEquals(null, leaf.getLiteral()); + assertEquals(null, leaf.getLiteralList()); + + leaf = leaves.get(1); + assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); + assertEquals("first_name", leaf.getColumnName()); + assertEquals("sue", leaf.getLiteral()); + + leaf = leaves.get(2); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(12, leaf.getLiteral()); + + leaf = leaves.get(3); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(4, leaf.getLiteral()); + + assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)", + sarg.getExpression().toString()); + assertNoSharedNodes(sarg.getExpression(), + Sets.newIdentityHashSet()); + assertEquals(TruthValue.NO, + sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.YES, + TruthValue.NO))); + assertEquals(TruthValue.YES, + sarg.evaluate(values(TruthValue.YES, TruthValue.YES, TruthValue.YES, + TruthValue.NO))); + assertEquals(TruthValue.YES, + sarg.evaluate(values(TruthValue.NO, TruthValue.NO, TruthValue.YES, + TruthValue.NO))); + assertEquals(TruthValue.YES, + sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.NO, + TruthValue.NO))); + assertEquals(TruthValue.YES, + sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.YES, + TruthValue.YES))); + assertEquals(TruthValue.NULL, + sarg.evaluate(values(TruthValue.NULL, TruthValue.YES, TruthValue.YES, + TruthValue.NO))); + assertEquals(TruthValue.NULL, + sarg.evaluate(values(TruthValue.NO, TruthValue.NULL, TruthValue.YES, + TruthValue.NO))); + assertEquals(TruthValue.NULL, + sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.NULL, + TruthValue.NO))); + assertEquals(TruthValue.NULL, + sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.YES, + TruthValue.NULL))); + assertEquals(TruthValue.YES_NO, + sarg.evaluate(values(TruthValue.NO, TruthValue.YES_NO, TruthValue.YES, + TruthValue.YES_NO))); + assertEquals(TruthValue.NO_NULL, + sarg.evaluate(values(TruthValue.NO, TruthValue.YES_NULL, TruthValue.YES, + TruthValue.NO_NULL))); + assertEquals(TruthValue.YES_NULL, + sarg.evaluate(values(TruthValue.YES_NULL, TruthValue.YES_NO_NULL, + TruthValue.YES, TruthValue.NULL))); + assertEquals(TruthValue.YES_NO_NULL, + sarg.evaluate(values(TruthValue.NO_NULL, TruthValue.YES_NO_NULL, + TruthValue.YES, TruthValue.NO))); + } + + @Test + public void testExpression3() throws Exception { + /* (id between 23 and 45) and + first_name = 'alan' and + substr('xxxxx', 3) == first_name and + 'smith' = last_name and + substr(first_name, 3) == 'yyy' */ + String exprStr = " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " boolean \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " false \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " int \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 23 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 45 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " string \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " alan \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " xxxxx \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 3 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " org.apache.hadoop.hive.ql.udf.UDFSubstr \n" + + " \n" + + " \n" + + " substr \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " smith \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " last_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 3 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " org.apache.hadoop.hive.ql.udf.UDFSubstr \n" + + " \n" + + " \n" + + " substr \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " yyy \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n"; + + SearchArgumentImpl sarg = + (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + List leaves = sarg.getLeaves(); + assertEquals(3, leaves.size()); + + String[] conditions = new String[]{ + "lt(id, 45)", /* id between 23 and 45 */ + "not(lteq(id, 23))", /* id between 23 and 45 */ + "eq(first_name, Binary{\"alan\"})", /* first_name = 'alan' */ + "eq(last_name, Binary{\"smith\"})" /* 'smith' = last_name */ + }; + + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); + String expected = String.format("and(and(and(%1$s, %2$s), %3$s), %4$s)", conditions); + assertEquals(expected, p.toString()); + + PredicateLeaf leaf = leaves.get(0); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(null, leaf.getLiteral()); + assertEquals(23, leaf.getLiteralList().get(0)); + assertEquals(45, leaf.getLiteralList().get(1)); + + leaf = leaves.get(1); + assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); + assertEquals("first_name", leaf.getColumnName()); + assertEquals("alan", leaf.getLiteral()); + + leaf = leaves.get(2); + assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); + assertEquals("last_name", leaf.getColumnName()); + assertEquals("smith", leaf.getLiteral()); + + assertEquals("(and leaf-0 leaf-1 leaf-2)", + sarg.getExpression().toString()); + assertNoSharedNodes(sarg.getExpression(), + Sets.newIdentityHashSet()); + } + + @Test + public void testExpression4() throws Exception { + /* id <> 12 and + first_name in ('john', 'sue') and + id in (34,50) */ + String exprStr = " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " int \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 12 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " boolean \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " string \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " john \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " sue \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 34 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 50 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + "\n"; + + SearchArgumentImpl sarg = + (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + List leaves = sarg.getLeaves(); + assertEquals(3, leaves.size()); + + String[] conditions = new String[]{ + "not(eq(id, 12))", /* id <> 12 */ + "or(eq(first_name, Binary{\"john\"}), eq(first_name, Binary{\"sue\"}))", /* first_name in + ('john', 'sue') */ + "or(eq(id, 34), eq(id, 50))" /* id in (34,50) */ + }; + + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); + String expected = String.format("and(and(%1$s, %2$s), %3$s)", conditions); + assertEquals(expected, p.toString()); + + PredicateLeaf leaf = leaves.get(0); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(12, leaf.getLiteral()); + + leaf = leaves.get(1); + assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator()); + assertEquals("first_name", leaf.getColumnName()); + assertEquals("john", leaf.getLiteralList().get(0)); + assertEquals("sue", leaf.getLiteralList().get(1)); + + leaf = leaves.get(2); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(34, leaf.getLiteralList().get(0)); + assertEquals(50, leaf.getLiteralList().get(1)); + + assertEquals("(and (not leaf-0) leaf-1 leaf-2)", + sarg.getExpression().toString()); + assertNoSharedNodes(sarg.getExpression(), + Sets.newIdentityHashSet()); + assertEquals(TruthValue.YES, + sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.YES))); + assertEquals(TruthValue.NULL, + sarg.evaluate(values(TruthValue.NULL, TruthValue.YES, TruthValue.YES))); + assertEquals(TruthValue.NULL, + sarg.evaluate(values(TruthValue.NO, TruthValue.NULL, TruthValue.YES))); + assertEquals(TruthValue.NO, + sarg.evaluate(values(TruthValue.YES, TruthValue.YES, TruthValue.YES))); + assertEquals(TruthValue.NO, + sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.NO))); + assertEquals(TruthValue.NO, + sarg.evaluate(values(TruthValue.NO, TruthValue.YES_NULL, TruthValue.NO))); + assertEquals(TruthValue.NO_NULL, + sarg.evaluate(values(TruthValue.NO, TruthValue.NULL, TruthValue.YES_NO_NULL))); + assertEquals(TruthValue.NO_NULL, + sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.NO_NULL))); + } + + @Test + public void testExpression5() throws Exception { + /* (first_name < 'owen' or 'foobar' = substr(last_name, 4)) and + first_name between 'david' and 'greg' */ + String exprStr = " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " string \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " owen \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " boolean \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " foobar \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " last_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " int \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 4 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " org.apache.hadoop.hive.ql.udf.UDFSubstr \n" + + " \n" + + " \n" + + " substr \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " false \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " david \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " greg \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n"; + + SearchArgumentImpl sarg = + (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + List leaves = sarg.getLeaves(); + assertEquals(1, leaves.size()); + + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); + String expected = + "and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, Binary{\"david\"})))"; + assertEquals(p.toString(), expected); + + assertEquals(PredicateLeaf.Type.STRING, leaves.get(0).getType()); + assertEquals(PredicateLeaf.Operator.BETWEEN, + leaves.get(0).getOperator()); + assertEquals("first_name", leaves.get(0).getColumnName()); + + assertEquals("leaf-0", + sarg.getExpression().toString()); + assertNoSharedNodes(sarg.getExpression(), + Sets.newIdentityHashSet()); + } + + @Test + public void testExpression7() throws Exception { + /* (id < 10 and id < 11 and id < 12) or (id < 13 and id < 14 and id < 15) or + (id < 16 and id < 17) or id < 18 */ + String exprStr = " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " int \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 10 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " boolean \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 11 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 12 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 13 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 14 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 15 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 16 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 17 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 18 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + ""; + + SearchArgumentImpl sarg = + (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + List leaves = sarg.getLeaves(); + assertEquals(9, leaves.size()); + + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); + String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(" + + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), " + + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 13)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 14)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 14)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 14)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 15)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 15)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 15)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 13)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 14)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 14)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 14)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 15)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 15)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 15)), lt(id, 17)))"; + assertEquals(p.toString(), expected); + + PredicateLeaf leaf = leaves.get(0); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(18, leaf.getLiteral()); + + leaf = leaves.get(1); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(10, leaf.getLiteral()); + + leaf = leaves.get(2); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(13, leaf.getLiteral()); + + leaf = leaves.get(3); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(16, leaf.getLiteral()); + + leaf = leaves.get(4); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(11, leaf.getLiteral()); + + leaf = leaves.get(5); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(12, leaf.getLiteral()); + + leaf = leaves.get(6); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(14, leaf.getLiteral()); + + leaf = leaves.get(7); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(15, leaf.getLiteral()); + + leaf = leaves.get(8); + assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); + assertEquals("id", leaf.getColumnName()); + assertEquals(17, leaf.getLiteral()); + + assertEquals("(and" + + " (or leaf-0 leaf-1 leaf-2 leaf-3)" + + " (or leaf-0 leaf-4 leaf-2 leaf-3)" + + " (or leaf-0 leaf-5 leaf-2 leaf-3)" + + " (or leaf-0 leaf-1 leaf-6 leaf-3)" + + " (or leaf-0 leaf-4 leaf-6 leaf-3)" + + " (or leaf-0 leaf-5 leaf-6 leaf-3)" + + " (or leaf-0 leaf-1 leaf-7 leaf-3)" + + " (or leaf-0 leaf-4 leaf-7 leaf-3)" + + " (or leaf-0 leaf-5 leaf-7 leaf-3)" + + " (or leaf-0 leaf-1 leaf-2 leaf-8)" + + " (or leaf-0 leaf-4 leaf-2 leaf-8)" + + " (or leaf-0 leaf-5 leaf-2 leaf-8)" + + " (or leaf-0 leaf-1 leaf-6 leaf-8)" + + " (or leaf-0 leaf-4 leaf-6 leaf-8)" + + " (or leaf-0 leaf-5 leaf-6 leaf-8)" + + " (or leaf-0 leaf-1 leaf-7 leaf-8)" + + " (or leaf-0 leaf-4 leaf-7 leaf-8)" + + " (or leaf-0 leaf-5 leaf-7 leaf-8))", + sarg.getExpression().toString()); + } + + @Test + public void testExpression8() throws Exception { + /* first_name = last_name */ + String exprStr = " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " first_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " string \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " last_name \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " boolean \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " "; + + SearchArgumentImpl sarg = + (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + List leaves = sarg.getLeaves(); + assertEquals(0, leaves.size()); + + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); + assertNull(p); + + assertEquals("YES_NO_NULL", + sarg.getExpression().toString()); + } + + @Test + public void testExpression9() throws Exception { + /* first_name = last_name */ + String exprStr = " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " int \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 1 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 3 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " true \n" + + " \n" + + " \n" + + " org.apache.hadoop.hive.ql.udf.UDFOPPlus \n" + + " \n" + + " \n" + + " + \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 4 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " true \n" + + " \n" + + " \n" + + " org.apache.hadoop.hive.ql.udf.UDFOPPlus \n" + + " \n" + + " \n" + + " + \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " boolean \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " "; + + SearchArgumentImpl sarg = + (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + List leaves = sarg.getLeaves(); + assertEquals(0, leaves.size()); + + assertEquals("YES_NO_NULL", + sarg.getExpression().toString()); + assertEquals(TruthValue.YES_NO_NULL, sarg.evaluate(values())); + } + + @Test + public void testExpression10() throws Exception { + /* id >= 10 and not (10 > id) */ + String exprStr = " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " int \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 10 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " boolean \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 10 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + ""; + + SearchArgumentImpl sarg = + (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + List leaves = sarg.getLeaves(); + assertEquals(1, leaves.size()); + + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); + String expected = "and(not(lt(id, 10)), not(lt(id, 10)))"; + assertEquals(expected, p.toString()); + + assertEquals(PredicateLeaf.Type.INTEGER, leaves.get(0).getType()); + assertEquals(PredicateLeaf.Operator.LESS_THAN, + leaves.get(0).getOperator()); + assertEquals("id", leaves.get(0).getColumnName()); + assertEquals(10, leaves.get(0).getLiteral()); + + assertEquals("(and (not leaf-0) (not leaf-0))", + sarg.getExpression().toString()); + assertNoSharedNodes(sarg.getExpression(), + Sets.newIdentityHashSet()); + assertEquals(TruthValue.NO, sarg.evaluate(values(TruthValue.YES))); + assertEquals(TruthValue.YES, sarg.evaluate(values(TruthValue.NO))); + assertEquals(TruthValue.NULL, sarg.evaluate(values(TruthValue.NULL))); + assertEquals(TruthValue.NO_NULL, sarg.evaluate(values(TruthValue.YES_NULL))); + assertEquals(TruthValue.YES_NULL, sarg.evaluate(values(TruthValue.NO_NULL))); + assertEquals(TruthValue.YES_NO, sarg.evaluate(values(TruthValue.YES_NO))); + assertEquals(TruthValue.YES_NO_NULL, sarg.evaluate(values(TruthValue.YES_NO_NULL))); + } + + private static TruthValue[] values(TruthValue... vals) { + return vals; + } + + // The following tests use serialized ASTs that I generated using Hive from + // branch-0.14. + + @Test + public void TestTimestampSarg() throws Exception { + String serialAst = + "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLn" + + "BsYW4uRXhwck5vZGVDb2x1bW5EZXPjAQF08wAAAWJpZ29y4wECb3JnLmFwYWNoZS5o" + + "YWRvb3AuaGl2ZS5zZXJkZTIudHlwZWluZm8uUHJpbWl0aXZlVHlwZUluZu8BAXRpbW" + + "VzdGFt8AEDb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5xbC5wbGFuLkV4cHJOb2RlQ29u" + + "c3RhbnREZXPjAQECAQFzdHJpbucDATIwMTUtMDMtMTcgMTI6MzQ6NbYBBG9yZy5hcG" + + "FjaGUuaGFkb29wLmhpdmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QRXF1YewB" + + "AAABgj0BRVFVQcwBBW9yZy5hcGFjaGUuaGFkb29wLmlvLkJvb2xlYW5Xcml0YWJs5Q" + + "EAAAECAQFib29sZWHu"; + SearchArgument sarg = + new ConvertAstToSearchArg(Utilities.deserializeExpression(serialAst)) + .buildSearchArgument(); + assertEquals("leaf-0", sarg.getExpression().toString()); + assertEquals(1, sarg.getLeaves().size()); + PredicateLeaf leaf = sarg.getLeaves().get(0); + assertEquals(PredicateLeaf.Type.TIMESTAMP, leaf.getType()); + assertEquals("(EQUALS ts 2015-03-17 12:34:56.0)", leaf.toString()); + } + + @Test + public void TestDateSarg() throws Exception { + String serialAst = + "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + + "wck5vZGVDb2x1bW5EZXPjAQFk9AAAAWJpZ29y4wECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5zZXJkZT" + + "IudHlwZWluZm8uUHJpbWl0aXZlVHlwZUluZu8BAWRhdOUBA29yZy5hcGFjaGUuaGFkb29wLmhpdmUuc" + + "WwucGxhbi5FeHByTm9kZUNvbnN0YW50RGVz4wEBAgEBc3RyaW7nAwEyMDE1LTA1LTC1AQRvcmcuYXBh" + + "Y2hlLmhhZG9vcC5oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUH" + + "MAQVvcmcuYXBhY2hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABAgEBYm9vbGVh7g=="; + SearchArgument sarg = + new ConvertAstToSearchArg(Utilities.deserializeExpression(serialAst)) + .buildSearchArgument(); + assertEquals("leaf-0", sarg.getExpression().toString()); + assertEquals(1, sarg.getLeaves().size()); + PredicateLeaf leaf = sarg.getLeaves().get(0); + assertEquals(PredicateLeaf.Type.DATE, leaf.getType()); + assertEquals("(EQUALS dt 2015-05-05)", leaf.toString()); + } + + @Test + public void TestDecimalSarg() throws Exception { + String serialAst = + "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + + "wck5vZGVDb2x1bW5EZXPjAQFkZeMAAAFiaWdvcuMBAm9yZy5hcGFjaGUuaGFkb29wLmhpdmUuc2VyZG" + + "UyLnR5cGVpbmZvLkRlY2ltYWxUeXBlSW5m7wEUAAFkZWNpbWHsAQNvcmcuYXBhY2hlLmhhZG9vcC5oa" + + "XZlLnFsLnBsYW4uRXhwck5vZGVDb25zdGFudERlc+MBAQRvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnNl" + + "cmRlMi50eXBlaW5mby5QcmltaXRpdmVUeXBlSW5m7wEBaW70AvYBAQVvcmcuYXBhY2hlLmhhZG9vcC5" + + "oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQZvcmcuYXBhY2" + + "hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABBAEBYm9vbGVh7g=="; + SearchArgument sarg = + new ConvertAstToSearchArg(Utilities.deserializeExpression(serialAst)) + .buildSearchArgument(); + assertEquals("leaf-0", sarg.getExpression().toString()); + assertEquals(1, sarg.getLeaves().size()); + PredicateLeaf leaf = sarg.getLeaves().get(0); + assertEquals(PredicateLeaf.Type.DECIMAL, leaf.getType()); + assertEquals("(EQUALS dec 123)", leaf.toString()); + } + + @Test + public void TestCharSarg() throws Exception { + String serialAst = + "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + + "wck5vZGVDb2x1bW5EZXPjAQFj6AAAAWJpZ29y4wECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5zZXJkZT" + + "IudHlwZWluZm8uQ2hhclR5cGVJbmbvARQBY2hh8gEDb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5xbC5wb" + + "GFuLkV4cHJOb2RlQ29uc3RhbnREZXPjAQEEb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5zZXJkZTIudHlw" + + "ZWluZm8uUHJpbWl0aXZlVHlwZUluZu8BAXN0cmlu5wMBY2hhciAgICAgoAEFb3JnLmFwYWNoZS5oYWR" + + "vb3AuaGl2ZS5xbC51ZGYuZ2VuZXJpYy5HZW5lcmljVURGT1BFcXVh7AEAAAGCPQFFUVVBzAEGb3JnLm" + + "FwYWNoZS5oYWRvb3AuaW8uQm9vbGVhbldyaXRhYmzlAQAAAQQBAWJvb2xlYe4="; + SearchArgument sarg = + new ConvertAstToSearchArg(Utilities.deserializeExpression(serialAst)) + .buildSearchArgument(); + assertEquals("leaf-0", sarg.getExpression().toString()); + assertEquals(1, sarg.getLeaves().size()); + PredicateLeaf leaf = sarg.getLeaves().get(0); + assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals("(EQUALS ch char )", leaf.toString()); + } + + @Test + public void TestVarcharSarg() throws Exception { + String serialAst = + "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + + "wck5vZGVDb2x1bW5EZXPjAQF24wAAAWJpZ29y4wECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5zZXJkZT" + + "IudHlwZWluZm8uVmFyY2hhclR5cGVJbmbvAcgBAXZhcmNoYfIBA29yZy5hcGFjaGUuaGFkb29wLmhpd" + + "mUucWwucGxhbi5FeHByTm9kZUNvbnN0YW50RGVz4wEBBG9yZy5hcGFjaGUuaGFkb29wLmhpdmUuc2Vy" + + "ZGUyLnR5cGVpbmZvLlByaW1pdGl2ZVR5cGVJbmbvAQFzdHJpbucDAXZhcmlhYmzlAQVvcmcuYXBhY2h" + + "lLmhhZG9vcC5oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQ" + + "ZvcmcuYXBhY2hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABBAEBYm9vbGVh7g=="; + SearchArgument sarg = + new ConvertAstToSearchArg(Utilities.deserializeExpression(serialAst)) + .buildSearchArgument(); + assertEquals("leaf-0", sarg.getExpression().toString()); + assertEquals(1, sarg.getLeaves().size()); + PredicateLeaf leaf = sarg.getLeaves().get(0); + assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); + assertEquals("(EQUALS vc variable)", leaf.toString()); + } + + @Test + public void TestBigintSarg() throws Exception { + String serialAst = + "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + + "wck5vZGVDb2x1bW5EZXPjAQFi6QAAAWJpZ29y4wECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5zZXJkZT" + + "IudHlwZWluZm8uUHJpbWl0aXZlVHlwZUluZu8BAWJpZ2lu9AEDb3JnLmFwYWNoZS5oYWRvb3AuaGl2Z" + + "S5xbC5wbGFuLkV4cHJOb2RlQ29uc3RhbnREZXPjAQECBwnywAEBBG9yZy5hcGFjaGUuaGFkb29wLmhp" + + "dmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QRXF1YewBAAABgj0BRVFVQcwBBW9yZy5hcGFjaGU" + + "uaGFkb29wLmlvLkJvb2xlYW5Xcml0YWJs5QEAAAECAQFib29sZWHu"; + SearchArgument sarg = + new ConvertAstToSearchArg(Utilities.deserializeExpression(serialAst)) + .buildSearchArgument(); + assertEquals("leaf-0", sarg.getExpression().toString()); + assertEquals(1, sarg.getLeaves().size()); + PredicateLeaf leaf = sarg.getLeaves().get(0); + assertEquals(PredicateLeaf.Type.LONG, leaf.getType()); + assertEquals("(EQUALS bi 12345)", leaf.toString()); + } + + @Test + public void TestBooleanSarg() throws Exception { + String serialAst = + "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + + "wck5vZGVHZW5lcmljRnVuY0Rlc+MBAQABAgECb3JnLmFwYWNoZS5oYWRvb3AuaGl2ZS5xbC5wbGFuLk" + + "V4cHJOb2RlQ29sdW1uRGVz4wEBYrEAAAFib29sb3LjAQNvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnNlc" + + "mRlMi50eXBlaW5mby5QcmltaXRpdmVUeXBlSW5m7wEBYm9vbGVh7gEEb3JnLmFwYWNoZS5oYWRvb3Au" + + "aGl2ZS5xbC5wbGFuLkV4cHJOb2RlQ29uc3RhbnREZXPjAQEDCQUBAQVvcmcuYXBhY2hlLmhhZG9vcC5" + + "oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQZvcmcuYXBhY2" + + "hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABAwkBAgEBYrIAAAgBAwkBB29yZy5hcGFjaGUua" + + "GFkb29wLmhpdmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QQW7kAQEGAQAAAQMJ"; + SearchArgument sarg = + new ConvertAstToSearchArg(Utilities.deserializeExpression(serialAst)) + .buildSearchArgument(); + assertEquals("(and leaf-0 leaf-1)", sarg.getExpression().toString()); + assertEquals(2, sarg.getLeaves().size()); + PredicateLeaf leaf = sarg.getLeaves().get(0); + assertEquals(PredicateLeaf.Type.BOOLEAN, leaf.getType()); + assertEquals("(EQUALS b1 true)", leaf.toString()); + leaf = sarg.getLeaves().get(1); + assertEquals(PredicateLeaf.Type.BOOLEAN, leaf.getType()); + assertEquals("(EQUALS b2 true)", leaf.toString()); + } + + @Test + public void TestFloatSarg() throws Exception { + String serialAst = + "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + + "wck5vZGVDb2x1bW5EZXPjAQFmbPQAAAFiaWdvcuMBAm9yZy5hcGFjaGUuaGFkb29wLmhpdmUuc2VyZG" + + "UyLnR5cGVpbmZvLlByaW1pdGl2ZVR5cGVJbmbvAQFmbG9h9AEDb3JnLmFwYWNoZS5oYWRvb3AuaGl2Z" + + "S5xbC5wbGFuLkV4cHJOb2RlQ29uc3RhbnREZXPjAQECBwQ/jMzNAQRvcmcuYXBhY2hlLmhhZG9vcC5o" + + "aXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQVvcmcuYXBhY2h" + + "lLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABAgEBYm9vbGVh7g=="; + SearchArgument sarg = + new ConvertAstToSearchArg(Utilities.deserializeExpression(serialAst)) + .buildSearchArgument(); + assertEquals("leaf-0", sarg.getExpression().toString()); + assertEquals(1, sarg.getLeaves().size()); + PredicateLeaf leaf = sarg.getLeaves().get(0); + assertEquals(PredicateLeaf.Type.FLOAT, leaf.getType()); + assertEquals("(EQUALS flt 1.1)", leaf.toString()); + } + + @Test + public void TestDoubleSarg() throws Exception { + String serialAst = + "AQEAamF2YS51dGlsLkFycmF5TGlz9AECAQFvcmcuYXBhY2hlLmhhZG9vcC5oaXZlLnFsLnBsYW4uRXh" + + "wck5vZGVDb2x1bW5EZXPjAQFkYuwAAAFiaWdvcuMBAm9yZy5hcGFjaGUuaGFkb29wLmhpdmUuc2VyZG" + + "UyLnR5cGVpbmZvLlByaW1pdGl2ZVR5cGVJbmbvAQFkb3VibOUBA29yZy5hcGFjaGUuaGFkb29wLmhpd" + + "mUucWwucGxhbi5FeHByTm9kZUNvbnN0YW50RGVz4wEBAgcKQAGZmZmZmZoBBG9yZy5hcGFjaGUuaGFk" + + "b29wLmhpdmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QRXF1YewBAAABgj0BRVFVQcwBBW9yZy5" + + "hcGFjaGUuaGFkb29wLmlvLkJvb2xlYW5Xcml0YWJs5QEAAAECAQFib29sZWHu"; + SearchArgument sarg = + new ConvertAstToSearchArg(Utilities.deserializeExpression(serialAst)) + .buildSearchArgument(); + assertEquals("leaf-0", sarg.getExpression().toString()); + assertEquals(1, sarg.getLeaves().size()); + PredicateLeaf leaf = sarg.getLeaves().get(0); + assertEquals(PredicateLeaf.Type.FLOAT, leaf.getType()); + assertEquals("(EQUALS dbl 2.2)", leaf.toString()); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java index 46ce49c..433474b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java @@ -19,27 +19,23 @@ package org.apache.hadoop.hive.ql.io.sarg; import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertNull; import static junit.framework.Assert.assertTrue; import com.google.common.collect.Sets; import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionBuilder; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.PredicateLeafImpl; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.junit.Test; import java.beans.XMLDecoder; import java.io.ByteArrayInputStream; import java.io.UnsupportedEncodingException; import java.lang.reflect.Field; -import java.math.BigDecimal; +import java.sql.Date; import java.sql.Timestamp; import java.util.List; import java.util.Set; @@ -92,96 +88,103 @@ public static PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator, @Test public void testNotPushdown() throws Exception { - assertEquals("leaf-1", ExpressionBuilder.pushDownNot(leaf(1)).toString()); + assertEquals("leaf-1", SearchArgumentImpl.BuilderImpl.pushDownNot(leaf(1)) + .toString()); assertEquals("(not leaf-1)", - ExpressionBuilder.pushDownNot(not(leaf(1))).toString()); + SearchArgumentImpl.BuilderImpl.pushDownNot(not(leaf(1))).toString()); assertEquals("leaf-1", - ExpressionBuilder.pushDownNot(not(not(leaf(1)))).toString()); + SearchArgumentImpl.BuilderImpl.pushDownNot(not(not(leaf(1)))) + .toString()); assertEquals("(not leaf-1)", - ExpressionBuilder.pushDownNot(not(not(not(leaf(1))))).toString()); + SearchArgumentImpl.BuilderImpl.pushDownNot(not(not(not(leaf(1))))). + toString()); assertEquals("(or leaf-1 (not leaf-2))", - ExpressionBuilder.pushDownNot(not(and(not(leaf(1)), + SearchArgumentImpl.BuilderImpl.pushDownNot(not(and(not(leaf(1)), leaf(2)))).toString()); assertEquals("(and (not leaf-1) leaf-2)", - ExpressionBuilder.pushDownNot(not(or(leaf(1), + SearchArgumentImpl.BuilderImpl.pushDownNot(not(or(leaf(1), not(leaf(2))))).toString()); assertEquals("(or (or (not leaf-1) leaf-2) leaf-3)", - ExpressionBuilder.pushDownNot(or(not(and(leaf(1), not(leaf(2)))), + SearchArgumentImpl.BuilderImpl.pushDownNot(or(not(and(leaf(1), + not(leaf(2)))), not(not(leaf(3))))).toString()); - assertEquals("NO", ExpressionBuilder.pushDownNot( + assertEquals("NO", SearchArgumentImpl.BuilderImpl.pushDownNot( not(constant(TruthValue.YES))).toString()); - assertEquals("YES", ExpressionBuilder.pushDownNot( + assertEquals("YES", SearchArgumentImpl.BuilderImpl.pushDownNot( not(constant(TruthValue.NO))).toString()); - assertEquals("NULL", ExpressionBuilder.pushDownNot( + assertEquals("NULL", SearchArgumentImpl.BuilderImpl.pushDownNot( not(constant(TruthValue.NULL))).toString()); - assertEquals("YES_NO", ExpressionBuilder.pushDownNot( + assertEquals("YES_NO", SearchArgumentImpl.BuilderImpl.pushDownNot( not(constant(TruthValue.YES_NO))).toString()); - assertEquals("YES_NULL", ExpressionBuilder.pushDownNot( + assertEquals("YES_NULL", SearchArgumentImpl.BuilderImpl.pushDownNot( not(constant(TruthValue.NO_NULL))).toString()); - assertEquals("NO_NULL", ExpressionBuilder.pushDownNot( + assertEquals("NO_NULL", SearchArgumentImpl.BuilderImpl.pushDownNot( not(constant(TruthValue.YES_NULL))).toString()); - assertEquals("YES_NO_NULL", ExpressionBuilder.pushDownNot( + assertEquals("YES_NO_NULL", SearchArgumentImpl.BuilderImpl.pushDownNot( not(constant(TruthValue.YES_NO_NULL))).toString()); } @Test public void testFlatten() throws Exception { - assertEquals("leaf-1", ExpressionBuilder.flatten(leaf(1)).toString()); + assertEquals("leaf-1", SearchArgumentImpl.BuilderImpl.flatten(leaf(1)).toString()); assertEquals("NO", - ExpressionBuilder.flatten(constant(TruthValue.NO)).toString()); + SearchArgumentImpl.BuilderImpl.flatten(constant(TruthValue.NO)).toString()); assertEquals("(not (not leaf-1))", - ExpressionBuilder.flatten(not(not(leaf(1)))).toString()); + SearchArgumentImpl.BuilderImpl.flatten(not(not(leaf(1)))).toString()); assertEquals("(and leaf-1 leaf-2)", - ExpressionBuilder.flatten(and(leaf(1), leaf(2))).toString()); + SearchArgumentImpl.BuilderImpl.flatten(and(leaf(1), leaf(2))).toString()); assertEquals("(and (or leaf-1 leaf-2) leaf-3)", - ExpressionBuilder.flatten(and(or(leaf(1), leaf(2)), leaf(3)) + SearchArgumentImpl.BuilderImpl.flatten(and(or(leaf(1), leaf(2)), leaf(3)) ).toString()); assertEquals("(and leaf-1 leaf-2 leaf-3 leaf-4)", - ExpressionBuilder.flatten(and(and(leaf(1), leaf(2)), + SearchArgumentImpl.BuilderImpl.flatten(and(and(leaf(1), leaf(2)), and(leaf(3), leaf(4)))).toString()); assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4)", - ExpressionBuilder.flatten(or(leaf(1), or(leaf(2), or(leaf(3), + SearchArgumentImpl.BuilderImpl.flatten(or(leaf(1), or(leaf(2), or(leaf(3), leaf(4))))).toString()); assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4)", - ExpressionBuilder.flatten(or(or(or(leaf(1), leaf(2)), leaf(3)), + SearchArgumentImpl.BuilderImpl.flatten(or(or(or(leaf(1), leaf(2)), leaf(3)), leaf(4))).toString()); assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4 leaf-5 leaf-6)", - ExpressionBuilder.flatten(or(or(leaf(1), or(leaf(2), leaf(3))), + SearchArgumentImpl.BuilderImpl.flatten(or(or(leaf(1), or(leaf(2), leaf(3))), or(or(leaf(4), leaf(5)), leaf(6)))).toString()); assertEquals("(and (not leaf-1) leaf-2 (not leaf-3) leaf-4 (not leaf-5) leaf-6)", - ExpressionBuilder.flatten(and(and(not(leaf(1)), and(leaf(2), + SearchArgumentImpl.BuilderImpl.flatten(and(and(not(leaf(1)), and(leaf(2), not(leaf(3)))), and(and(leaf(4), not(leaf(5))), leaf(6))) ).toString()); assertEquals("(not (and leaf-1 leaf-2 leaf-3))", - ExpressionBuilder.flatten(not(and(leaf(1), and(leaf(2), leaf(3)))) + SearchArgumentImpl.BuilderImpl.flatten(not(and(leaf(1), and(leaf(2), leaf(3)))) ).toString()); } @Test public void testFoldMaybe() throws Exception { assertEquals("(and leaf-1)", - ExpressionBuilder.foldMaybe(and(leaf(1), + SearchArgumentImpl.BuilderImpl.foldMaybe(and(leaf(1), constant(TruthValue.YES_NO_NULL))).toString()); assertEquals("(and leaf-1 leaf-2)", - ExpressionBuilder.foldMaybe(and(leaf(1), + SearchArgumentImpl.BuilderImpl.foldMaybe(and(leaf(1), constant(TruthValue.YES_NO_NULL), leaf(2))).toString()); assertEquals("(and leaf-1 leaf-2)", - ExpressionBuilder.foldMaybe(and(constant(TruthValue.YES_NO_NULL), - leaf(1), leaf(2), constant(TruthValue.YES_NO_NULL))).toString()); + SearchArgumentImpl.BuilderImpl. + foldMaybe(and(constant(TruthValue.YES_NO_NULL), + leaf(1), leaf(2), constant(TruthValue.YES_NO_NULL))).toString()); assertEquals("YES_NO_NULL", - ExpressionBuilder.foldMaybe(and(constant(TruthValue.YES_NO_NULL), - constant(TruthValue.YES_NO_NULL))).toString()); + SearchArgumentImpl.BuilderImpl. + foldMaybe(and(constant(TruthValue.YES_NO_NULL), + constant(TruthValue.YES_NO_NULL))).toString()); assertEquals("YES_NO_NULL", - ExpressionBuilder.foldMaybe(or(leaf(1), - constant(TruthValue.YES_NO_NULL))).toString()); + SearchArgumentImpl.BuilderImpl. + foldMaybe(or(leaf(1), + constant(TruthValue.YES_NO_NULL))).toString()); assertEquals("(or leaf-1 (and leaf-2))", - ExpressionBuilder.foldMaybe(or(leaf(1), + SearchArgumentImpl.BuilderImpl.foldMaybe(or(leaf(1), and(leaf(2), constant(TruthValue.YES_NO_NULL)))).toString()); assertEquals("(and leaf-1)", - ExpressionBuilder.foldMaybe(and(or(leaf(2), + SearchArgumentImpl.BuilderImpl.foldMaybe(and(or(leaf(2), constant(TruthValue.YES_NO_NULL)), leaf(1))).toString()); - assertEquals("(and leaf-100)", ExpressionBuilder.foldMaybe( - ExpressionBuilder.convertToCNF(and(leaf(100), + assertEquals("(and leaf-100)", SearchArgumentImpl.BuilderImpl.foldMaybe( + SearchArgumentImpl.BuilderImpl.convertToCNF(and(leaf(100), or(and(leaf(0), leaf(1)), and(leaf(2), leaf(3)), and(leaf(4), leaf(5)), @@ -195,28 +198,31 @@ public void testFoldMaybe() throws Exception { @Test public void testCNF() throws Exception { - assertEquals("leaf-1", ExpressionBuilder.convertToCNF(leaf(1)).toString()); - assertEquals("NO", ExpressionBuilder.convertToCNF( + assertEquals("leaf-1", SearchArgumentImpl.BuilderImpl.convertToCNF(leaf(1)). + toString()); + assertEquals("NO", SearchArgumentImpl.BuilderImpl.convertToCNF( constant(TruthValue.NO)).toString()); - assertEquals("(not leaf-1)", ExpressionBuilder.convertToCNF( + assertEquals("(not leaf-1)", SearchArgumentImpl.BuilderImpl.convertToCNF( not(leaf(1))).toString()); - assertEquals("(and leaf-1 leaf-2)", ExpressionBuilder.convertToCNF( - and(leaf(1), leaf(2))).toString()); - assertEquals("(or (not leaf-1) leaf-2)", ExpressionBuilder.convertToCNF( - or(not(leaf(1)), leaf(2))).toString()); + assertEquals("(and leaf-1 leaf-2)", SearchArgumentImpl.BuilderImpl. + convertToCNF( + and(leaf(1), leaf(2))).toString()); + assertEquals("(or (not leaf-1) leaf-2)", SearchArgumentImpl.BuilderImpl. + convertToCNF( + or(not(leaf(1)), leaf(2))).toString()); assertEquals("(and (or leaf-1 leaf-2) (not leaf-3))", - ExpressionBuilder.convertToCNF( + SearchArgumentImpl.BuilderImpl.convertToCNF( and(or(leaf(1), leaf(2)), not(leaf(3)))).toString()); assertEquals("(and (or leaf-1 leaf-3) (or leaf-2 leaf-3)" + " (or leaf-1 leaf-4) (or leaf-2 leaf-4))", - ExpressionBuilder.convertToCNF( + SearchArgumentImpl.BuilderImpl.convertToCNF( or(and(leaf(1), leaf(2)), and(leaf(3), leaf(4)))).toString()); assertEquals("(and" + " (or leaf-1 leaf-5) (or leaf-2 leaf-5)" + " (or leaf-3 leaf-5) (or leaf-4 leaf-5)" + " (or leaf-1 leaf-6) (or leaf-2 leaf-6)" + " (or leaf-3 leaf-6) (or leaf-4 leaf-6))", - ExpressionBuilder.convertToCNF( + SearchArgumentImpl.BuilderImpl.convertToCNF( or(and(leaf(1), leaf(2), leaf(3), leaf(4)), and(leaf(5), leaf(6)))).toString()); assertEquals("(and" + @@ -224,7 +230,7 @@ public void testCNF() throws Exception { " (or leaf-5 leaf-6 (not leaf-7) leaf-2 leaf-3)" + " (or leaf-5 leaf-6 (not leaf-7) leaf-1 leaf-4)" + " (or leaf-5 leaf-6 (not leaf-7) leaf-2 leaf-4))", - ExpressionBuilder.convertToCNF( + SearchArgumentImpl.BuilderImpl.convertToCNF( or(and(leaf(1), leaf(2)), and(leaf(3), leaf(4)), or(leaf(5), leaf(6)), @@ -248,33 +254,37 @@ public void testCNF() throws Exception { " (or leaf-8 leaf-0 leaf-5 leaf-7)" + " (or leaf-8 leaf-1 leaf-5 leaf-7)" + " (or leaf-8 leaf-2 leaf-5 leaf-7))", - ExpressionBuilder.convertToCNF(or(and(leaf(0), leaf(1), leaf(2)), + SearchArgumentImpl.BuilderImpl.convertToCNF(or(and(leaf(0), leaf(1), + leaf(2)), and(leaf(3), leaf(4), leaf(5)), and(leaf(6), leaf(7)), leaf(8))).toString()); - assertEquals("YES_NO_NULL", ExpressionBuilder.convertToCNF(or(and(leaf(0), leaf(1)), - and(leaf(2), leaf(3)), - and(leaf(4), leaf(5)), - and(leaf(6), leaf(7)), - and(leaf(8), leaf(9)), - and(leaf(10), leaf(11)), - and(leaf(12), leaf(13)), - and(leaf(14), leaf(15)), - and(leaf(16), leaf(17)))).toString()); - assertEquals("(and leaf-100 YES_NO_NULL)", ExpressionBuilder.convertToCNF(and(leaf(100), - or(and(leaf(0), leaf(1)), - and(leaf(2), leaf(3)), - and(leaf(4), leaf(5)), - and(leaf(6), leaf(7)), - and(leaf(8), leaf(9)), - and(leaf(10), leaf(11)), - and(leaf(12), leaf(13)), - and(leaf(14), leaf(15)), - and(leaf(16), leaf(17))))).toString()); - assertNoSharedNodes(ExpressionBuilder.convertToCNF(or(and(leaf(0), leaf(1), leaf(2)), - and(leaf(3), leaf(4), leaf(5)), - and(leaf(6), leaf(7)), - leaf(8))), Sets.newIdentityHashSet()); + assertEquals("YES_NO_NULL", SearchArgumentImpl.BuilderImpl. + convertToCNF(or(and(leaf(0), leaf(1)), + and(leaf(2), leaf(3)), + and(leaf(4), leaf(5)), + and(leaf(6), leaf(7)), + and(leaf(8), leaf(9)), + and(leaf(10), leaf(11)), + and(leaf(12), leaf(13)), + and(leaf(14), leaf(15)), + and(leaf(16), leaf(17)))).toString()); + assertEquals("(and leaf-100 YES_NO_NULL)", SearchArgumentImpl.BuilderImpl. + convertToCNF(and(leaf(100), + or(and(leaf(0), leaf(1)), + and(leaf(2), leaf(3)), + and(leaf(4), leaf(5)), + and(leaf(6), leaf(7)), + and(leaf(8), leaf(9)), + and(leaf(10), leaf(11)), + and(leaf(12), leaf(13)), + and(leaf(14), leaf(15)), + and(leaf(16), leaf(17))))).toString()); + assertNoSharedNodes(SearchArgumentImpl.BuilderImpl. + convertToCNF(or(and(leaf(0), leaf(1), leaf(2)), + and(leaf(3), leaf(4), leaf(5)), + and(leaf(6), leaf(7)), + leaf(8))), Sets.newIdentityHashSet()); } private static void assertNoSharedNodes(ExpressionTree tree, @@ -310,2596 +320,6 @@ private ExprNodeGenericFuncDesc getFuncDesc(String xmlSerialized) { } } - @Test - public void testExpression1() throws Exception { - // first_name = 'john' or - // 'greg' < first_name or - // 'alan' > first_name or - // id > 12 or - // 13 < id or - // id < 15 or - // 16 > id or - // (id <=> 30 and first_name <=> 'owen') - String exprStr = " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " string \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " john \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " boolean \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " greg \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " alan \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " int \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 12 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 13 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 15 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 16 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 30 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " owen \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n"; - - SearchArgumentImpl sarg = - (SearchArgumentImpl) SearchArgumentFactory.create(getFuncDesc(exprStr)); - List leaves = sarg.getLeaves(); - assertEquals(9, leaves.size()); - - FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); - String[] conditions = new String[]{ - "eq(first_name, Binary{\"john\"})", /* first_name = 'john' */ - "not(lteq(first_name, Binary{\"greg\"}))", /* 'greg' < first_name */ - "lt(first_name, Binary{\"alan\"})", /* 'alan' > first_name */ - "not(lteq(id, 12))", /* id > 12 or */ - "not(lteq(id, 13))", /* 13 < id or */ - "lt(id, 15)", /* id < 15 or */ - "lt(id, 16)", /* 16 > id or */ - "eq(id, 30)", /* id <=> 30 */ - "eq(first_name, Binary{\"owen\"})" /* first_name <=> 'owen' */ - }; - String expected = String - .format("and(or(or(or(or(or(or(or(%1$s, %2$s), %3$s), %4$s), %5$s), %6$s), %7$s), %8$s), " + - "or(or(or(or(or(or(or(%1$s, %2$s), %3$s), %4$s), %5$s), %6$s), %7$s), %9$s))", conditions); - assertEquals(expected, p.toString()); - - PredicateLeaf leaf = leaves.get(0); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); - assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); - assertEquals("first_name", leaf.getColumnName()); - assertEquals("john", leaf.getLiteral()); - - leaf = leaves.get(1); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); - assertEquals("first_name", leaf.getColumnName()); - assertEquals("greg", leaf.getLiteral()); - - leaf = leaves.get(2); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("first_name", leaf.getColumnName()); - assertEquals("alan", leaf.getLiteral()); - - leaf = leaves.get(3); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(12, leaf.getLiteral()); - - leaf = leaves.get(4); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(13, leaf.getLiteral()); - - leaf = leaves.get(5); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(15, leaf.getLiteral()); - - leaf = leaves.get(6); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(16, leaf.getLiteral()); - - leaf = leaves.get(7); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(30, leaf.getLiteral()); - - leaf = leaves.get(8); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); - assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator()); - assertEquals("first_name", leaf.getColumnName()); - assertEquals("owen", leaf.getLiteral()); - - assertEquals("(and (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" + - " (not leaf-4) leaf-5 leaf-6 leaf-7)" + - " (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" + - " (not leaf-4) leaf-5 leaf-6 leaf-8))", - sarg.getExpression().toString()); - assertNoSharedNodes(sarg.getExpression(), - Sets.newIdentityHashSet()); - } - - @Test - public void testExpression2() throws Exception { - /* first_name is null or - first_name <> 'sue' or - id >= 12 or - id <= 4; */ - String exprStr = " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " string \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " boolean \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " sue \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " int \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 12 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 4 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n"; - - SearchArgumentImpl sarg = - (SearchArgumentImpl) SearchArgumentFactory.create(getFuncDesc(exprStr)); - List leaves = sarg.getLeaves(); - assertEquals(4, leaves.size()); - - String[] conditions = new String[]{ - "eq(first_name, null)", /* first_name is null */ - "not(eq(first_name, Binary{\"sue\"}))", /* first_name <> 'sue' */ - "not(lt(id, 12))", /* id >= 12 */ - "lteq(id, 4)" /* id <= 4 */ - }; - - FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); - String expected = String.format("or(or(or(%1$s, %2$s), %3$s), %4$s)", conditions); - assertEquals(expected, p.toString()); - - PredicateLeaf leaf = leaves.get(0); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); - assertEquals(PredicateLeaf.Operator.IS_NULL, leaf.getOperator()); - assertEquals("first_name", leaf.getColumnName()); - assertEquals(null, leaf.getLiteral()); - assertEquals(null, leaf.getLiteralList()); - - leaf = leaves.get(1); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); - assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); - assertEquals("first_name", leaf.getColumnName()); - assertEquals("sue", leaf.getLiteral()); - - leaf = leaves.get(2); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(12, leaf.getLiteral()); - - leaf = leaves.get(3); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(4, leaf.getLiteral()); - - assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)", - sarg.getExpression().toString()); - assertNoSharedNodes(sarg.getExpression(), - Sets.newIdentityHashSet()); - assertEquals(TruthValue.NO, - sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.YES, - TruthValue.NO))); - assertEquals(TruthValue.YES, - sarg.evaluate(values(TruthValue.YES, TruthValue.YES, TruthValue.YES, - TruthValue.NO))); - assertEquals(TruthValue.YES, - sarg.evaluate(values(TruthValue.NO, TruthValue.NO, TruthValue.YES, - TruthValue.NO))); - assertEquals(TruthValue.YES, - sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.NO, - TruthValue.NO))); - assertEquals(TruthValue.YES, - sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.YES, - TruthValue.YES))); - assertEquals(TruthValue.NULL, - sarg.evaluate(values(TruthValue.NULL, TruthValue.YES, TruthValue.YES, - TruthValue.NO))); - assertEquals(TruthValue.NULL, - sarg.evaluate(values(TruthValue.NO, TruthValue.NULL, TruthValue.YES, - TruthValue.NO))); - assertEquals(TruthValue.NULL, - sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.NULL, - TruthValue.NO))); - assertEquals(TruthValue.NULL, - sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.YES, - TruthValue.NULL))); - assertEquals(TruthValue.YES_NO, - sarg.evaluate(values(TruthValue.NO, TruthValue.YES_NO, TruthValue.YES, - TruthValue.YES_NO))); - assertEquals(TruthValue.NO_NULL, - sarg.evaluate(values(TruthValue.NO, TruthValue.YES_NULL, TruthValue.YES, - TruthValue.NO_NULL))); - assertEquals(TruthValue.YES_NULL, - sarg.evaluate(values(TruthValue.YES_NULL, TruthValue.YES_NO_NULL, - TruthValue.YES, TruthValue.NULL))); - assertEquals(TruthValue.YES_NO_NULL, - sarg.evaluate(values(TruthValue.NO_NULL, TruthValue.YES_NO_NULL, - TruthValue.YES, TruthValue.NO))); - } - - @Test - public void testExpression3() throws Exception { - /* (id between 23 and 45) and - first_name = 'alan' and - substr('xxxxx', 3) == first_name and - 'smith' = last_name and - substr(first_name, 3) == 'yyy' */ - String exprStr = " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " boolean \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " false \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " int \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 23 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 45 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " string \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " alan \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " xxxxx \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 3 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " org.apache.hadoop.hive.ql.udf.UDFSubstr \n" + - " \n" + - " \n" + - " substr \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " smith \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " last_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 3 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " org.apache.hadoop.hive.ql.udf.UDFSubstr \n" + - " \n" + - " \n" + - " substr \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " yyy \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n"; - - SearchArgumentImpl sarg = - (SearchArgumentImpl) SearchArgumentFactory.create(getFuncDesc(exprStr)); - List leaves = sarg.getLeaves(); - assertEquals(3, leaves.size()); - - String[] conditions = new String[]{ - "lt(id, 45)", /* id between 23 and 45 */ - "not(lteq(id, 23))", /* id between 23 and 45 */ - "eq(first_name, Binary{\"alan\"})", /* first_name = 'alan' */ - "eq(last_name, Binary{\"smith\"})" /* 'smith' = last_name */ - }; - - FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); - String expected = String.format("and(and(and(%1$s, %2$s), %3$s), %4$s)", conditions); - assertEquals(expected, p.toString()); - - PredicateLeaf leaf = leaves.get(0); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(null, leaf.getLiteral()); - assertEquals(23, leaf.getLiteralList().get(0)); - assertEquals(45, leaf.getLiteralList().get(1)); - - leaf = leaves.get(1); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); - assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); - assertEquals("first_name", leaf.getColumnName()); - assertEquals("alan", leaf.getLiteral()); - - leaf = leaves.get(2); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); - assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); - assertEquals("last_name", leaf.getColumnName()); - assertEquals("smith", leaf.getLiteral()); - - assertEquals("(and leaf-0 leaf-1 leaf-2)", - sarg.getExpression().toString()); - assertNoSharedNodes(sarg.getExpression(), - Sets.newIdentityHashSet()); - } - - @Test - public void testExpression4() throws Exception { - /* id <> 12 and - first_name in ('john', 'sue') and - id in (34,50) */ - String exprStr = " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " int \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 12 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " boolean \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " string \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " john \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " sue \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 34 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 50 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - "\n"; - - SearchArgumentImpl sarg = - (SearchArgumentImpl) SearchArgumentFactory.create(getFuncDesc(exprStr)); - List leaves = sarg.getLeaves(); - assertEquals(3, leaves.size()); - - String[] conditions = new String[]{ - "not(eq(id, 12))", /* id <> 12 */ - "or(eq(first_name, Binary{\"john\"}), eq(first_name, Binary{\"sue\"}))", /* first_name in - ('john', 'sue') */ - "or(eq(id, 34), eq(id, 50))" /* id in (34,50) */ - }; - - FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); - String expected = String.format("and(and(%1$s, %2$s), %3$s)", conditions); - assertEquals(expected, p.toString()); - - PredicateLeaf leaf = leaves.get(0); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(12, leaf.getLiteral()); - - leaf = leaves.get(1); - assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); - assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator()); - assertEquals("first_name", leaf.getColumnName()); - assertEquals("john", leaf.getLiteralList().get(0)); - assertEquals("sue", leaf.getLiteralList().get(1)); - - leaf = leaves.get(2); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(34, leaf.getLiteralList().get(0)); - assertEquals(50, leaf.getLiteralList().get(1)); - - assertEquals("(and (not leaf-0) leaf-1 leaf-2)", - sarg.getExpression().toString()); - assertNoSharedNodes(sarg.getExpression(), - Sets.newIdentityHashSet()); - assertEquals(TruthValue.YES, - sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.YES))); - assertEquals(TruthValue.NULL, - sarg.evaluate(values(TruthValue.NULL, TruthValue.YES, TruthValue.YES))); - assertEquals(TruthValue.NULL, - sarg.evaluate(values(TruthValue.NO, TruthValue.NULL, TruthValue.YES))); - assertEquals(TruthValue.NO, - sarg.evaluate(values(TruthValue.YES, TruthValue.YES, TruthValue.YES))); - assertEquals(TruthValue.NO, - sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.NO))); - assertEquals(TruthValue.NO, - sarg.evaluate(values(TruthValue.NO, TruthValue.YES_NULL, TruthValue.NO))); - assertEquals(TruthValue.NO_NULL, - sarg.evaluate(values(TruthValue.NO, TruthValue.NULL, TruthValue.YES_NO_NULL))); - assertEquals(TruthValue.NO_NULL, - sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.NO_NULL))); - } - - @Test - public void testExpression5() throws Exception { - /* (first_name < 'owen' or 'foobar' = substr(last_name, 4)) and - first_name between 'david' and 'greg' */ - String exprStr = " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " string \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " owen \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " boolean \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " foobar \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " last_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " int \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 4 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " org.apache.hadoop.hive.ql.udf.UDFSubstr \n" + - " \n" + - " \n" + - " substr \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " false \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " david \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " greg \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n"; - - SearchArgumentImpl sarg = - (SearchArgumentImpl) SearchArgumentFactory.create(getFuncDesc(exprStr)); - List leaves = sarg.getLeaves(); - assertEquals(1, leaves.size()); - - FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); - String expected = - "and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, Binary{\"david\"})))"; - assertEquals(p.toString(), expected); - - assertEquals(PredicateLeaf.Type.STRING, leaves.get(0).getType()); - assertEquals(PredicateLeaf.Operator.BETWEEN, - leaves.get(0).getOperator()); - assertEquals("first_name", leaves.get(0).getColumnName()); - - assertEquals("leaf-0", - sarg.getExpression().toString()); - assertNoSharedNodes(sarg.getExpression(), - Sets.newIdentityHashSet()); - } - - @Test - public void testExpression7() throws Exception { - /* (id < 10 and id < 11 and id < 12) or (id < 13 and id < 14 and id < 15) or - (id < 16 and id < 17) or id < 18 */ - String exprStr = " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " int \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 10 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " boolean \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 11 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 12 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 13 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 14 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 15 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 16 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 17 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 18 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - ""; - - SearchArgumentImpl sarg = - (SearchArgumentImpl) SearchArgumentFactory.create(getFuncDesc(exprStr)); - List leaves = sarg.getLeaves(); - assertEquals(9, leaves.size()); - - FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); - String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(" + - "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), " + - "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 16))), " + - "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 13)), lt(id, 16))), " + - "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 14)), lt(id, 16))), " + - "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 14)), lt(id, 16))), " + - "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 14)), lt(id, 16))), " + - "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 15)), lt(id, 16))), " + - "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 15)), lt(id, 16))), " + - "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 15)), lt(id, 16))), " + - "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 17))), " + - "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 17))), " + - "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 13)), lt(id, 17))), " + - "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 14)), lt(id, 17))), " + - "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 14)), lt(id, 17))), " + - "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 14)), lt(id, 17))), " + - "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 15)), lt(id, 17))), " + - "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 15)), lt(id, 17))), " + - "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 15)), lt(id, 17)))"; - assertEquals(p.toString(), expected); - - PredicateLeaf leaf = leaves.get(0); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(18, leaf.getLiteral()); - - leaf = leaves.get(1); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(10, leaf.getLiteral()); - - leaf = leaves.get(2); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(13, leaf.getLiteral()); - - leaf = leaves.get(3); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(16, leaf.getLiteral()); - - leaf = leaves.get(4); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(11, leaf.getLiteral()); - - leaf = leaves.get(5); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(12, leaf.getLiteral()); - - leaf = leaves.get(6); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(14, leaf.getLiteral()); - - leaf = leaves.get(7); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(15, leaf.getLiteral()); - - leaf = leaves.get(8); - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); - assertEquals("id", leaf.getColumnName()); - assertEquals(17, leaf.getLiteral()); - - assertEquals("(and" + - " (or leaf-0 leaf-1 leaf-2 leaf-3)" + - " (or leaf-0 leaf-4 leaf-2 leaf-3)" + - " (or leaf-0 leaf-5 leaf-2 leaf-3)" + - " (or leaf-0 leaf-1 leaf-6 leaf-3)" + - " (or leaf-0 leaf-4 leaf-6 leaf-3)" + - " (or leaf-0 leaf-5 leaf-6 leaf-3)" + - " (or leaf-0 leaf-1 leaf-7 leaf-3)" + - " (or leaf-0 leaf-4 leaf-7 leaf-3)" + - " (or leaf-0 leaf-5 leaf-7 leaf-3)" + - " (or leaf-0 leaf-1 leaf-2 leaf-8)" + - " (or leaf-0 leaf-4 leaf-2 leaf-8)" + - " (or leaf-0 leaf-5 leaf-2 leaf-8)" + - " (or leaf-0 leaf-1 leaf-6 leaf-8)" + - " (or leaf-0 leaf-4 leaf-6 leaf-8)" + - " (or leaf-0 leaf-5 leaf-6 leaf-8)" + - " (or leaf-0 leaf-1 leaf-7 leaf-8)" + - " (or leaf-0 leaf-4 leaf-7 leaf-8)" + - " (or leaf-0 leaf-5 leaf-7 leaf-8))", - sarg.getExpression().toString()); - } - - @Test - public void testExpression8() throws Exception { - /* first_name = last_name */ - String exprStr = " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " first_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " string \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " last_name \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " boolean \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " "; - - SearchArgumentImpl sarg = - (SearchArgumentImpl) SearchArgumentFactory.create(getFuncDesc(exprStr)); - List leaves = sarg.getLeaves(); - assertEquals(0, leaves.size()); - - FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); - assertNull(p); - - assertEquals("YES_NO_NULL", - sarg.getExpression().toString()); - } - - @Test - public void testExpression9() throws Exception { - /* first_name = last_name */ - String exprStr = " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " int \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 1 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 3 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " true \n" + - " \n" + - " \n" + - " org.apache.hadoop.hive.ql.udf.UDFOPPlus \n" + - " \n" + - " \n" + - " + \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 4 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " true \n" + - " \n" + - " \n" + - " org.apache.hadoop.hive.ql.udf.UDFOPPlus \n" + - " \n" + - " \n" + - " + \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " boolean \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " "; - - SearchArgumentImpl sarg = - (SearchArgumentImpl) SearchArgumentFactory.create(getFuncDesc(exprStr)); - List leaves = sarg.getLeaves(); - assertEquals(0, leaves.size()); - - assertEquals("YES_NO_NULL", - sarg.getExpression().toString()); - assertEquals(TruthValue.YES_NO_NULL, sarg.evaluate(values())); - } - - @Test - public void testExpression10() throws Exception { - /* id >= 10 and not (10 > id) */ - String exprStr = " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " int \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 10 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " boolean \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " id \n" + - " \n" + - " \n" + - " orc_people \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " 10 \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - ""; - - SearchArgumentImpl sarg = - (SearchArgumentImpl) SearchArgumentFactory.create(getFuncDesc(exprStr)); - List leaves = sarg.getLeaves(); - assertEquals(1, leaves.size()); - - FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); - String expected = "and(not(lt(id, 10)), not(lt(id, 10)))"; - assertEquals(expected, p.toString()); - - assertEquals(PredicateLeaf.Type.INTEGER, leaves.get(0).getType()); - assertEquals(PredicateLeaf.Operator.LESS_THAN, - leaves.get(0).getOperator()); - assertEquals("id", leaves.get(0).getColumnName()); - assertEquals(10, leaves.get(0).getLiteral()); - - assertEquals("(and (not leaf-0) (not leaf-0))", - sarg.getExpression().toString()); - assertNoSharedNodes(sarg.getExpression(), - Sets.newIdentityHashSet()); - assertEquals(TruthValue.NO, sarg.evaluate(values(TruthValue.YES))); - assertEquals(TruthValue.YES, sarg.evaluate(values(TruthValue.NO))); - assertEquals(TruthValue.NULL, sarg.evaluate(values(TruthValue.NULL))); - assertEquals(TruthValue.NO_NULL, sarg.evaluate(values(TruthValue.YES_NULL))); - assertEquals(TruthValue.YES_NULL, sarg.evaluate(values(TruthValue.NO_NULL))); - assertEquals(TruthValue.YES_NO, sarg.evaluate(values(TruthValue.YES_NO))); - assertEquals(TruthValue.YES_NO_NULL, sarg.evaluate(values(TruthValue.YES_NO_NULL))); - } - private static TruthValue[] values(TruthValue... vals) { return vals; } @@ -2909,9 +329,9 @@ public void testBuilder() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("x", 10) - .lessThanEquals("y", "hi") - .equals("z", 1.0) + .lessThan("x", PredicateLeaf.Type.INTEGER, 10) + .lessThanEquals("y", PredicateLeaf.Type.STRING, "hi") + .equals("z", PredicateLeaf.Type.FLOAT, 1.0) .end() .build(); assertEquals("leaf-0 = (LESS_THAN x 10)\n" + @@ -2921,10 +341,10 @@ public void testBuilder() throws Exception { sarg = SearchArgumentFactory.newBuilder() .startNot() .startOr() - .isNull("x") - .between("y", 10, 20) - .in("z", 1, 2, 3) - .nullSafeEquals("a", "stinger") + .isNull("x", PredicateLeaf.Type.INTEGER) + .between("y", PredicateLeaf.Type.INTEGER, 10, 20) + .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3) + .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger") .end() .end() .build(); @@ -2933,12 +353,6 @@ public void testBuilder() throws Exception { "leaf-2 = (IN z 1 2 3)\n" + "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString()); - - FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); - String expected = - "and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), not(or(or(eq(z, 1), " + - "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))"; - assertEquals(expected, p.toString()); } @Test @@ -2946,38 +360,36 @@ public void testBuilderComplexTypes() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("x", new DateWritable(10)) - .lessThanEquals("y", new HiveChar("hi", 10)) - .equals("z", HiveDecimal.create("1.0")) + .lessThan("x", PredicateLeaf.Type.DATE, + Date.valueOf("1970-1-11")) + .lessThanEquals("y", PredicateLeaf.Type.STRING, + new HiveChar("hi", 10).toString()) + .equals("z", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("1.0")) .end() .build(); assertEquals("leaf-0 = (LESS_THAN x 1970-01-11)\n" + - "leaf-1 = (LESS_THAN_EQUALS y hi)\n" + + "leaf-1 = (LESS_THAN_EQUALS y hi )\n" + "leaf-2 = (EQUALS z 1)\n" + "expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString()); - assertEquals("lteq(y, Binary{\"hi\"})", - ParquetRecordReaderWrapper.toFilterPredicate(sarg).toString()); sarg = SearchArgumentFactory.newBuilder() .startNot() .startOr() - .isNull("x") - .between("y", HiveDecimal.create(10), 20.0) - .in("z", (byte) 1, (short) 2, (int) 3) - .nullSafeEquals("a", new HiveVarchar("stinger", 100)) + .isNull("x", PredicateLeaf.Type.INTEGER) + .between("y", PredicateLeaf.Type.DECIMAL, + new HiveDecimalWritable("10"), new HiveDecimalWritable("20.0")) + .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3) + .nullSafeEquals("a", PredicateLeaf.Type.STRING, + new HiveVarchar("stinger", 100).toString()) .end() .end() .build(); assertEquals("leaf-0 = (IS_NULL x)\n" + - "leaf-1 = (BETWEEN y 10 20.0)\n" + + "leaf-1 = (BETWEEN y 10 20)\n" + "leaf-2 = (IN z 1 2 3)\n" + "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" + - "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString()); - - FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); - String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " + - "not(eq(a, Binary{\"stinger\"})))"; - assertEquals(expected, p.toString()); + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", + sarg.toString()); } @Test @@ -2985,38 +397,36 @@ public void testBuilderComplexTypes2() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("x", new DateWritable(10)) - .lessThanEquals("y", new HiveChar("hi", 10)) - .equals("z", new BigDecimal("1.0")) + .lessThan("x", PredicateLeaf.Type.DATE, Date.valueOf("2005-3-12")) + .lessThanEquals("y", PredicateLeaf.Type.STRING, + new HiveChar("hi", 10).toString()) + .equals("z", PredicateLeaf.Type.DECIMAL, + new HiveDecimalWritable("1.0")) .end() .build(); - assertEquals("leaf-0 = (LESS_THAN x 1970-01-11)\n" + - "leaf-1 = (LESS_THAN_EQUALS y hi)\n" + - "leaf-2 = (EQUALS z 1.0)\n" + + assertEquals("leaf-0 = (LESS_THAN x 2005-03-12)\n" + + "leaf-1 = (LESS_THAN_EQUALS y hi )\n" + + "leaf-2 = (EQUALS z 1)\n" + "expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString()); - assertEquals("lteq(y, Binary{\"hi\"})", - ParquetRecordReaderWrapper.toFilterPredicate(sarg).toString()); sarg = SearchArgumentFactory.newBuilder() .startNot() .startOr() - .isNull("x") - .between("y", new BigDecimal(10), 20.0) - .in("z", (byte) 1, (short) 2, (int) 3) - .nullSafeEquals("a", new HiveVarchar("stinger", 100)) + .isNull("x", PredicateLeaf.Type.INTEGER) + .between("y", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("10"), + new HiveDecimalWritable("20.0")) + .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3) + .nullSafeEquals("a", PredicateLeaf.Type.STRING, + new HiveVarchar("stinger", 100).toString()) .end() .end() .build(); assertEquals("leaf-0 = (IS_NULL x)\n" + - "leaf-1 = (BETWEEN y 10 20.0)\n" + + "leaf-1 = (BETWEEN y 10 20)\n" + "leaf-2 = (IN z 1 2 3)\n" + "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" + - "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString()); - - FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); - String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " + - "not(eq(a, Binary{\"stinger\"})))"; - assertEquals(expected, p.toString()); + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", + sarg.toString()); } @Test @@ -3024,24 +434,20 @@ public void testBuilderFloat() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("x", new Short((short) 22)) - .lessThan("x1", new Integer(22)) - .lessThanEquals("y", new HiveChar("hi", 10)) - .equals("z", new Float("0.22")) - .equals("z1", new Double(0.22)) + .lessThan("x", PredicateLeaf.Type.INTEGER, new Integer((short) 22)) + .lessThan("x1", PredicateLeaf.Type.INTEGER, new Integer(22)) + .lessThanEquals("y", PredicateLeaf.Type.STRING, + new HiveChar("hi", 10).toString()) + .equals("z", PredicateLeaf.Type.FLOAT, new Double(0.22)) + .equals("z1", PredicateLeaf.Type.FLOAT, new Double(0.22)) .end() .build(); assertEquals("leaf-0 = (LESS_THAN x 22)\n" + "leaf-1 = (LESS_THAN x1 22)\n" + - "leaf-2 = (LESS_THAN_EQUALS y hi)\n" + + "leaf-2 = (LESS_THAN_EQUALS y hi )\n" + "leaf-3 = (EQUALS z 0.22)\n" + "leaf-4 = (EQUALS z1 0.22)\n" + "expr = (and leaf-0 leaf-1 leaf-2 leaf-3 leaf-4)", sarg.toString()); - - FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); - String expected = "and(and(and(and(lt(x, 22), lt(x1, 22)), lteq(y, Binary{\"hi\"})), eq(z, " + - "0.22)), eq(z1, 0.22))"; - assertEquals(expected, p.toString()); } @Test @@ -3054,12 +460,12 @@ public void testTimestampSerialization() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("x", now) + .lessThan("x", PredicateLeaf.Type.TIMESTAMP, now) .end() .build(); String serializedSarg = sarg.toKryo(); - SearchArgument sarg2 = SearchArgumentImpl.fromKryo(serializedSarg); + SearchArgument sarg2 = ConvertAstToSearchArg.create(serializedSarg); Field literalField = PredicateLeafImpl.class.getDeclaredField("literal"); literalField.setAccessible(true); diff --git serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java index 0a95363..3a92565 100644 --- serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java +++ serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java @@ -18,6 +18,10 @@ package org.apache.hadoop.hive.ql.io.sarg; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import java.sql.Date; +import java.sql.Timestamp; import java.util.List; /** @@ -43,14 +47,27 @@ * The possible types for sargs. */ public static enum Type { - INTEGER, // all of the integer types except long - LONG, - FLOAT, // float and double - STRING, // string, char, varchar - DATE, - DECIMAL, - TIMESTAMP, - BOOLEAN + INTEGER(Integer.class), // all of the integer types except long + LONG(Long.class), + FLOAT(Double.class), // float and double + STRING(String.class), // string, char, varchar + DATE(Date.class), + DECIMAL(HiveDecimalWritable.class), + TIMESTAMP(Timestamp.class), + BOOLEAN(Boolean.class); + + private final Class cls; + Type(Class cls) { + this.cls = cls; + } + + /** + * For all SARG leaves, the values must be the matching class. + * @return the value class + */ + public Class getValueClass() { + return cls; + } } /** diff --git serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java index 84604cb..bc0d503 100644 --- serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java +++ serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java @@ -215,58 +215,78 @@ public boolean isNeeded() { /** * Add a less than leaf to the current item on the stack. * @param column the name of the column + * @param type the type of the expression * @param literal the literal * @return this */ - public Builder lessThan(String column, Object literal); + public Builder lessThan(String column, PredicateLeaf.Type type, + Object literal); /** * Add a less than equals leaf to the current item on the stack. * @param column the name of the column + * @param type the type of the expression * @param literal the literal * @return this */ - public Builder lessThanEquals(String column, Object literal); + public Builder lessThanEquals(String column, PredicateLeaf.Type type, + Object literal); /** * Add an equals leaf to the current item on the stack. * @param column the name of the column + * @param type the type of the expression * @param literal the literal * @return this */ - public Builder equals(String column, Object literal); + public Builder equals(String column, PredicateLeaf.Type type, + Object literal); /** * Add a null safe equals leaf to the current item on the stack. * @param column the name of the column + * @param type the type of the expression * @param literal the literal * @return this */ - public Builder nullSafeEquals(String column, Object literal); + public Builder nullSafeEquals(String column, PredicateLeaf.Type type, + Object literal); /** * Add an in leaf to the current item on the stack. * @param column the name of the column + * @param type the type of the expression * @param literal the literal * @return this */ - public Builder in(String column, Object... literal); + public Builder in(String column, PredicateLeaf.Type type, + Object... literal); /** * Add an is null leaf to the current item on the stack. * @param column the name of the column + * @param type the type of the expression * @return this */ - public Builder isNull(String column); + public Builder isNull(String column, PredicateLeaf.Type type); /** * Add a between leaf to the current item on the stack. * @param column the name of the column + * @param type the type of the expression * @param lower the literal * @param upper the literal * @return this */ - public Builder between(String column, Object lower, Object upper); + public Builder between(String column, PredicateLeaf.Type type, + Object lower, Object upper); + + /** + * Add a truth value to the expression. + * @param truth + * @return this + */ + public Builder literal(TruthValue truth); /** * Build and return the SearchArgument that has been defined. All of the diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java index a17d2cc..885828a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java @@ -46,6 +46,10 @@ public HiveDecimalWritable() { } + public HiveDecimalWritable(String value) { + set(HiveDecimal.create(value)); + } + public HiveDecimalWritable(byte[] bytes, int scale) { set(bytes, scale); } @@ -58,6 +62,10 @@ public HiveDecimalWritable(HiveDecimal value) { set(value); } + public HiveDecimalWritable(long value) { + set((HiveDecimal.create(value))); + } + public void set(HiveDecimal value) { set(value.unscaledValue().toByteArray(), value.scale()); }