diff --git pom.xml pom.xml
index 793ea86..96b1167 100644
--- pom.xml
+++ pom.xml
@@ -146,7 +146,7 @@
requires netty < 3.6.0 we force hadoops version
-->
3.4.0.Final
- 1.5.0
+ 1.6.0rc3
0.12.0
2.5.0
1.0.1
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index a6a0ec1..6be3243 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -2388,7 +2388,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
// the stats object is converted to text and comparison is performed.
// When STRINGs are converted to other base types, NumberFormat exception
// can occur in which case TruthValue.YES_NO_NULL value is returned
- Object baseObj = predicate.getLiteral();
+ Object baseObj = predicate.getLiteral(PredicateLeaf.FileFormat.ORC);
Object minValue = getConvertedStatsObj(min, baseObj);
Object maxValue = getConvertedStatsObj(max, baseObj);
Object predObj = getBaseObjectForComparison(baseObj, minValue);
@@ -2432,7 +2432,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
if (minValue.equals(maxValue)) {
// for a single value, look through to see if that value is in the
// set
- for (Object arg : predicate.getLiteralList()) {
+ for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
predObj = getBaseObjectForComparison(arg, minValue);
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.MIN) {
@@ -2442,7 +2442,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
return TruthValue.NO_NULL;
} else {
// are all of the values outside of the range?
- for (Object arg : predicate.getLiteralList()) {
+ for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
predObj = getBaseObjectForComparison(arg, minValue);
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.MIN || loc == Location.MIDDLE ||
@@ -2453,7 +2453,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
return TruthValue.NO_NULL;
}
case BETWEEN:
- List args = predicate.getLiteralList();
+ List args = predicate.getLiteralList(PredicateLeaf.FileFormat.ORC);
Object predObj1 = getBaseObjectForComparison(args.get(0), minValue);
loc = compareToRange((Comparable) predObj1, minValue, maxValue);
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java
new file mode 100644
index 0000000..2797654
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import parquet.filter2.predicate.FilterApi;
+import parquet.filter2.predicate.FilterPredicate;
+
+import static parquet.filter2.predicate.FilterApi.not;
+import static parquet.filter2.predicate.FilterApi.or;
+
+/**
+ * The base class for building parquet supported filter predicate in primary types.
+ */
+public abstract class FilterPredicateLeafBuilder {
+
+ /**
+ * Build filter predicate with multiple constants
+ *
+ * @param op IN or BETWEEN
+ * @param literals
+ * @param columnName
+ * @return
+ */
+ public FilterPredicate buildPredicate(PredicateLeaf.Operator op, List literals,
+ String columnName) throws Exception {
+ FilterPredicate result = null;
+ switch (op) {
+ case IN:
+ for (Object literal : literals) {
+ if (result == null) {
+ result = buildPredict(PredicateLeaf.Operator.EQUALS, literal, columnName);
+ } else {
+ result = or(result, buildPredict(PredicateLeaf.Operator.EQUALS, literal,
+ columnName));
+ }
+ }
+ return result;
+ case BETWEEN:
+ if (literals.size() != 2) {
+ throw new RuntimeException(
+ "Not able to build 'between' operation filter with " + literals +
+ " which needs two literals");
+ }
+ Object min = literals.get(0);
+ Object max = literals.get(1);
+ FilterPredicate lt = not(buildPredict(PredicateLeaf.Operator.LESS_THAN_EQUALS,
+ min, columnName));
+ FilterPredicate gt = buildPredict(PredicateLeaf.Operator.LESS_THAN, max, columnName);
+ result = FilterApi.and(gt, lt);
+ return result;
+ default:
+ throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+ }
+ }
+
+ /**
+ * Build predicate with a single constant
+ *
+ * @param op EQUALS, NULL_SAFE_EQUALS, LESS_THAN, LESS_THAN_EQUALS, IS_NULL
+ * @param constant
+ * @param columnName
+ * @return null or a FilterPredicate, null means no filter will be executed
+ */
+ public abstract FilterPredicate buildPredict(PredicateLeaf.Operator op, Object constant,
+ String columnName) throws Exception;
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
new file mode 100644
index 0000000..83865e8
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
@@ -0,0 +1,169 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator;
+
+import parquet.filter2.predicate.FilterApi;
+import parquet.filter2.predicate.FilterPredicate;
+import parquet.io.api.Binary;
+
+import static parquet.filter2.predicate.FilterApi.eq;
+import static parquet.filter2.predicate.FilterApi.lt;
+import static parquet.filter2.predicate.FilterApi.ltEq;
+import static parquet.filter2.predicate.FilterApi.binaryColumn;
+import static parquet.filter2.predicate.FilterApi.booleanColumn;
+import static parquet.filter2.predicate.FilterApi.doubleColumn;
+import static parquet.filter2.predicate.FilterApi.intColumn;
+
+public class LeafFilterFactory {
+ private static final Log LOG = LogFactory.getLog(LeafFilterFactory.class);
+
+ class IntFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+ /**
+ * @param op consists of EQUALS, NULL_SAFE_EQUALS, LESS_THAN, LESS_THAN_EQUALS, IS_NULL
+ * @param literal
+ * @param columnName
+ * @return
+ */
+ @Override
+ public FilterPredicate buildPredict(Operator op, Object literal,
+ String columnName) {
+ switch (op) {
+ case LESS_THAN:
+ return lt(intColumn(columnName), ((Number) literal).intValue());
+ case IS_NULL:
+ case EQUALS:
+ case NULL_SAFE_EQUALS:
+ return eq(intColumn(columnName),
+ (literal == null) ? null : ((Number) literal).intValue());
+ case LESS_THAN_EQUALS:
+ return ltEq(intColumn(columnName), ((Number) literal).intValue());
+ default:
+ throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+ }
+ }
+ }
+
+ class LongFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+ @Override
+ public FilterPredicate buildPredict(Operator op, Object constant,
+ String columnName) {
+ switch (op) {
+ case LESS_THAN:
+ return lt(FilterApi.longColumn(columnName), ((Number) constant).longValue());
+ case IS_NULL:
+ case EQUALS:
+ case NULL_SAFE_EQUALS:
+ return eq(FilterApi.longColumn(columnName),
+ (constant == null) ? null : ((Number) constant).longValue());
+ case LESS_THAN_EQUALS:
+ return ltEq(FilterApi.longColumn(columnName),
+ ((Number) constant).longValue());
+ default:
+ throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+ }
+ }
+ }
+
+ class DoubleFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+
+ @Override
+ public FilterPredicate buildPredict(Operator op, Object constant,
+ String columnName) {
+ switch (op) {
+ case LESS_THAN:
+ return lt(doubleColumn(columnName), ((Number) constant).doubleValue());
+ case IS_NULL:
+ case EQUALS:
+ case NULL_SAFE_EQUALS:
+ return eq(doubleColumn(columnName),
+ (constant == null) ? null : ((Number) constant).doubleValue());
+ case LESS_THAN_EQUALS:
+ return ltEq(FilterApi.doubleColumn(columnName),
+ ((Number) constant).doubleValue());
+ default:
+ throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+ }
+ }
+ }
+
+ class BooleanFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+ @Override
+ public FilterPredicate buildPredict(Operator op, Object constant,
+ String columnName) throws Exception{
+ switch (op) {
+ case IS_NULL:
+ case EQUALS:
+ case NULL_SAFE_EQUALS:
+ return eq(booleanColumn(columnName),
+ (constant == null) ? null : ((Boolean) constant).booleanValue());
+ default:
+ throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+ }
+ }
+ }
+
+ class BinaryFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+ @Override
+ public FilterPredicate buildPredict(Operator op, Object constant,
+ String columnName) throws Exception{
+ switch (op) {
+ case LESS_THAN:
+ return lt(binaryColumn(columnName), Binary.fromString((String) constant));
+ case IS_NULL:
+ case EQUALS:
+ case NULL_SAFE_EQUALS:
+ return eq(binaryColumn(columnName),
+ (constant == null) ? null : Binary.fromString((String) constant));
+ case LESS_THAN_EQUALS:
+ return ltEq(binaryColumn(columnName), Binary.fromString((String) constant));
+ default:
+ // should never be executed
+ throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
+ }
+ }
+ }
+
+ /**
+ * get leaf filter builder by FilterPredicateType, currently date, decimal and timestamp is not
+ * supported yet.
+ * @param type FilterPredicateType
+ * @return
+ */
+ public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type type){
+ switch (type){
+ case INTEGER:
+ return new IntFilterPredicateLeafBuilder();
+ case LONG:
+ return new LongFilterPredicateLeafBuilder();
+ case FLOAT: // float and double
+ return new DoubleFilterPredicateLeafBuilder();
+ case STRING: // string, char, varchar
+ return new BinaryFilterPredicateLeafBuilder();
+ case BOOLEAN:
+ return new BooleanFilterPredicateLeafBuilder();
+ case DATE:
+ case DECIMAL:
+ case TIMESTAMP:
+ default:
+ LOG.debug("Conversion to Parquet FilterPredicate not supported for " + type);
+ return null;
+ }
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
index f5da46d..4e4d7fd 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
@@ -20,8 +20,12 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.IOConstants;
import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileSplit;
@@ -32,6 +36,7 @@
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
+import parquet.filter2.predicate.FilterPredicate;
import parquet.hadoop.ParquetFileReader;
import parquet.hadoop.ParquetInputFormat;
import parquet.hadoop.ParquetInputSplit;
@@ -83,6 +88,8 @@ public ParquetRecordReaderWrapper(
taskAttemptID = new TaskAttemptID();
}
+ setFilter(oldJobConf);
+
// create a TaskInputOutputContext
final TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(oldJobConf, taskAttemptID);
@@ -110,6 +117,27 @@ public ParquetRecordReaderWrapper(
}
}
+ public void setFilter(final JobConf conf) {
+ String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
+ String columnNamesString =
+ conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
+ if (serializedPushdown == null || columnNamesString == null || serializedPushdown.isEmpty() ||
+ columnNamesString.isEmpty()) {
+ return;
+ }
+
+ FilterPredicate p =
+ SearchArgumentFactory.create(Utilities.deserializeExpression(serializedPushdown))
+ .toFilterPredicate();
+ if (p != null) {
+ LOG.debug("Predicate filter for parquet is " + p.toString());
+ ParquetInputFormat.setFilterPredicate(conf, p);
+ } else {
+ LOG.debug("No predicate filter can be generated for " + TableScanDesc.FILTER_EXPR_CONF_STR +
+ " with the value of " + serializedPushdown);
+ }
+ }
+
@Override
public void close() throws IOException {
if (realReader != null) {
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
index eeb9641..f2f311f 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
@@ -18,6 +18,10 @@
package org.apache.hadoop.hive.ql.io.sarg;
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.io.Input;
+import com.esotericsoftware.kryo.io.Output;
+
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.util.ArrayDeque;
@@ -30,9 +34,13 @@
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder;
+import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
@@ -56,14 +64,14 @@
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import com.esotericsoftware.kryo.Kryo;
-import com.esotericsoftware.kryo.io.Input;
-import com.esotericsoftware.kryo.io.Output;
+import parquet.filter2.predicate.FilterApi;
+import parquet.filter2.predicate.FilterPredicate;
/**
* The implementation of SearchArguments.
*/
final class SearchArgumentImpl implements SearchArgument {
+ public static final Log LOG = LogFactory.getLog(SearchArgumentImpl.class);
static final class PredicateLeafImpl implements PredicateLeaf {
private final Operator operator;
@@ -98,7 +106,7 @@ public Operator getOperator() {
}
@Override
- public Type getType() {
+ public Type getType(){
return type;
}
@@ -108,18 +116,55 @@ public String getColumnName() {
}
@Override
- public Object getLiteral() {
+ public Object getLiteral(FileFormat format) {
// To get around a kryo 2.22 bug while deserialize a Timestamp into Date
// (https://github.com/EsotericSoftware/kryo/issues/88)
// When we see a Date, convert back into Timestamp
if (literal instanceof java.util.Date) {
- return new Timestamp(((java.util.Date)literal).getTime());
+ return new Timestamp(((java.util.Date) literal).getTime());
+ }
+
+ switch (format) {
+ case ORC:
+ // adapt base type to what orc needs
+ if (literal instanceof Integer) {
+ return Long.valueOf(literal.toString());
+ }
+ return literal;
+ case PARQUET:
+ return literal;
+ default:
+ throw new RuntimeException(
+ "File format " + format + "is not support to build search arguments");
}
- return literal;
}
@Override
- public List getLiteralList() {
+ public List getLiteralList(FileFormat format) {
+ switch (format) {
+ case ORC:
+ return getOrcLiteralList();
+ case PARQUET:
+ return getParquetLiteralList();
+ default:
+ throw new RuntimeException("File format is not support to build search arguments");
+ }
+ }
+
+ private List getOrcLiteralList() {
+ // no need to cast
+ if (literalList == null || literalList.size() == 0 || !(literalList.get(0) instanceof
+ Integer)) {
+ return literalList;
+ }
+ List result = new ArrayList();
+ for (Object o : literalList) {
+ result.add(Long.valueOf(o.toString()));
+ }
+ return result;
+ }
+
+ private List getParquetLiteralList() {
return literalList;
}
@@ -254,6 +299,76 @@ TruthValue evaluate(TruthValue[] leaves) {
}
}
+ FilterPredicate translate(List leafs){
+ FilterPredicate p = null;
+ switch (operator) {
+ case OR:
+ for(ExpressionTree child: children) {
+ if (p == null) {
+ p = child.translate(leafs);
+ } else {
+ FilterPredicate right = child.translate(leafs);
+ // constant means no filter, ignore it when it is null
+ if(right != null){
+ p = FilterApi.or(p, right);
+ }
+ }
+ }
+ return p;
+ case AND:
+ for(ExpressionTree child: children) {
+ if (p == null) {
+ p = child.translate(leafs);
+ } else {
+ FilterPredicate right = child.translate(leafs);
+ // constant means no filter, ignore it when it is null
+ if(right != null){
+ p = FilterApi.and(p, right);
+ }
+ }
+ }
+ return p;
+ case NOT:
+ FilterPredicate op = children.get(0).translate(leafs);
+ if (op != null) {
+ return FilterApi.not(op);
+ } else {
+ return null;
+ }
+ case LEAF:
+ return buildFilterPredicateFromPredicateLeaf(leafs.get(leaf));
+ case CONSTANT:
+ return null;// no filter will be executed for constant
+ default:
+ throw new IllegalStateException("Unknown operator: " + operator);
+ }
+ }
+
+ private FilterPredicate buildFilterPredicateFromPredicateLeaf(PredicateLeaf leaf) {
+ LeafFilterFactory leafFilterFactory = new LeafFilterFactory();
+ FilterPredicateLeafBuilder builder;
+ try {
+ builder = leafFilterFactory
+ .getLeafFilterBuilderByType(leaf.getType());
+ if (builder == null) return null;
+ if (isMultiLiteralsOperator(leaf.getOperator())) {
+ return builder.buildPredicate(leaf.getOperator(), leaf.getLiteralList(
+ PredicateLeaf.FileFormat.PARQUET), leaf.getColumnName());
+ } else {
+ return builder
+ .buildPredict(leaf.getOperator(), leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET),
+ leaf.getColumnName());
+ }
+ } catch (Exception e) {
+ LOG.error("fail to build predicate filter leaf with errors" + e, e);
+ return null;
+ }
+ }
+
+ private boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) {
+ return (op == PredicateLeaf.Operator.IN) || (op == PredicateLeaf.Operator.BETWEEN);
+ }
+
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
@@ -314,8 +429,9 @@ Operator getOperator() {
case BYTE:
case SHORT:
case INT:
- case LONG:
return PredicateLeaf.Type.INTEGER;
+ case LONG:
+ return PredicateLeaf.Type.LONG;
case CHAR:
case VARCHAR:
case STRING:
@@ -360,6 +476,8 @@ private static String getColumnName(ExprNodeGenericFuncDesc expr,
private static Object boxLiteral(ExprNodeConstantDesc lit) {
switch (getType(lit)) {
case INTEGER:
+ return ((Number) lit.getValue()).intValue();
+ case LONG:
return ((Number) lit.getValue()).longValue();
case STRING:
return StringUtils.stripEnd(lit.getValue().toString(), null);
@@ -420,6 +538,7 @@ private ExpressionTree createLeaf(PredicateLeaf.Operator operator,
if (type == null) {
return new ExpressionTree(TruthValue.YES_NO_NULL);
}
+
Object literal = null;
List literalList = null;
switch (operator) {
@@ -903,6 +1022,11 @@ static SearchArgument fromKryo(String value) {
return new Kryo().readObject(input, SearchArgumentImpl.class);
}
+ @Override
+ public FilterPredicate toFilterPredicate() {
+ return expression.translate(leaves);
+ }
+
private static class BuilderImpl implements Builder {
private final Deque currentTree =
new ArrayDeque();
@@ -987,10 +1111,11 @@ private static Object boxLiteral(Object literal) {
private static PredicateLeaf.Type getType(Object literal) {
if (literal instanceof Byte ||
literal instanceof Short ||
- literal instanceof Integer ||
- literal instanceof Long) {
+ literal instanceof Integer) {
return PredicateLeaf.Type.INTEGER;
- } else if (literal instanceof HiveChar ||
+ } else if(literal instanceof Long){
+ return PredicateLeaf.Type.LONG;
+ }else if (literal instanceof HiveChar ||
literal instanceof HiveVarchar ||
literal instanceof String) {
return PredicateLeaf.Type.STRING;
@@ -1005,7 +1130,7 @@ private static Object boxLiteral(Object literal) {
literal instanceof BigDecimal) {
return PredicateLeaf.Type.DECIMAL;
} else if (literal instanceof Boolean) {
- return PredicateLeaf.Type.BOOLEAN;
+ return PredicateLeaf.Type.BOOLEAN;
}
throw new IllegalArgumentException("Unknown type for literal " + literal);
}
@@ -1069,6 +1194,7 @@ public Builder in(String column, Object... literal) {
for(Object lit: literal){
argList.add(boxLiteral(lit));
}
+
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.IN,
getType(argList.get(0)), column, null, argList);
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
index c91644c..323fd33 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
@@ -22,14 +22,13 @@
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionBuilder;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionTree;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.junit.Test;
+import org.junit.*;
+import parquet.filter2.predicate.FilterPredicate;
import java.beans.XMLDecoder;
import java.io.ByteArrayInputStream;
@@ -39,6 +38,7 @@
import java.util.Set;
import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNull;
import static junit.framework.Assert.assertTrue;
/**
@@ -47,7 +47,7 @@
* to true and using a custom record reader that prints out the value of
* hive.io.filter.expr.serialized in createRecordReader. This should be
* replaced by generating the AST using the API and passing that in.
- *
+ *
* In each case, the corresponding part of the where clause is in the
* comment above the blob.
*/
@@ -76,12 +76,11 @@ private ExpressionTree constant(TruthValue val) {
/**
* Create a predicate leaf. This is used by another test.
*/
- public static
- PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator,
- PredicateLeaf.Type type,
- String columnName,
- Object literal,
- List literalList) {
+ public static PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator,
+ PredicateLeaf.Type type,
+ String columnName,
+ Object literal,
+ List literalList) {
return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName,
literal, literalList);
}
@@ -134,7 +133,7 @@ public void testFlatten() throws Exception {
).toString());
assertEquals("(and leaf-1 leaf-2 leaf-3 leaf-4)",
ExpressionBuilder.flatten(and(and(leaf(1), leaf(2)),
- and(leaf(3),leaf(4)))).toString());
+ and(leaf(3), leaf(4)))).toString());
assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4)",
ExpressionBuilder.flatten(or(leaf(1), or(leaf(2), or(leaf(3),
leaf(4))))).toString());
@@ -143,11 +142,11 @@ public void testFlatten() throws Exception {
leaf(4))).toString());
assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4 leaf-5 leaf-6)",
ExpressionBuilder.flatten(or(or(leaf(1), or(leaf(2), leaf(3))),
- or(or(leaf(4),leaf(5)), leaf(6)))).toString());
+ or(or(leaf(4), leaf(5)), leaf(6)))).toString());
assertEquals("(and (not leaf-1) leaf-2 (not leaf-3) leaf-4 (not leaf-5) leaf-6)",
ExpressionBuilder.flatten(and(and(not(leaf(1)), and(leaf(2),
not(leaf(3)))), and(and(leaf(4), not(leaf(5))), leaf(6)))
- ).toString());
+ ).toString());
assertEquals("(not (and leaf-1 leaf-2 leaf-3))",
ExpressionBuilder.flatten(not(and(leaf(1), and(leaf(2), leaf(3))))
).toString());
@@ -245,20 +244,20 @@ public void testCNF() throws Exception {
private static void assertNoSharedNodes(ExpressionTree tree,
Set seen
- ) throws Exception {
+ ) throws Exception {
if (seen.contains(tree) &&
tree.getOperator() != ExpressionTree.Operator.LEAF) {
assertTrue("repeated node in expression " + tree, false);
}
seen.add(tree);
if (tree.getChildren() != null) {
- for(ExpressionTree child: tree.getChildren()) {
+ for (ExpressionTree child : tree.getChildren()) {
assertNoSharedNodes(child, seen);
}
}
}
- private ExprNodeGenericFuncDesc getFuncDesc (String xmlSerialized) {
+ private ExprNodeGenericFuncDesc getFuncDesc(String xmlSerialized) {
byte[] bytes;
try {
bytes = xmlSerialized.getBytes("UTF-8");
@@ -275,6 +274,7 @@ private ExprNodeGenericFuncDesc getFuncDesc (String xmlSerialized) {
decoder.close();
}
}
+
@Test
public void testExpression1() throws Exception {
// first_name = 'john' or
@@ -749,59 +749,85 @@ public void testExpression1() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(9, leaves.size());
+ FilterPredicate p = sarg.toFilterPredicate();
+ String[] conditions = new String[]{
+ "eq(first_name, Binary{\"john\"})", /* first_name = 'john' */
+ "not(lteq(first_name, Binary{\"greg\"}))", /* 'greg' < first_name */
+ "lt(first_name, Binary{\"alan\"})", /* 'alan' > first_name */
+ "not(lteq(id, 12))", /* id > 12 or */
+ "not(lteq(id, 13))", /* 13 < id or */
+ "lt(id, 15)", /* id < 15 or */
+ "lt(id, 16)", /* 16 > id or */
+ "eq(id, 30)", /* id <=> 30 */
+ "eq(first_name, Binary{\"owen\"})" /* first_name <=> 'owen' */
+ };
+ String expected = String
+ .format("and(or(or(or(or(or(or(or(%1$s, %2$s), %3$s), %4$s), %5$s), %6$s), %7$s), %8$s), " +
+ "or(or(or(or(or(or(or(%1$s, %2$s), %3$s), %4$s), %5$s), %6$s), %7$s), %9$s))", conditions);
+ assertEquals(expected, p.toString());
+
PredicateLeaf leaf = leaves.get(0);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("john", leaf.getLiteral());
+ assertEquals("john", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals("john", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(1);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("greg", leaf.getLiteral());
+ assertEquals("greg", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals("greg", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(2);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("alan", leaf.getLiteral());
+ assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(3);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(12L, leaf.getLiteral());
+ assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(4);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(13L, leaf.getLiteral());
+ assertEquals(13L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(13, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(5);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(15L, leaf.getLiteral());
+ assertEquals(15L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(15, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(6);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(16L, leaf.getLiteral());
+ assertEquals(16L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(16, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(7);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(30L, leaf.getLiteral());
+ assertEquals(30L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(30, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(8);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("owen", leaf.getLiteral());
+ assertEquals("owen", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals("owen", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
assertEquals("(and (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" +
" (not leaf-4) leaf-5 leaf-6 leaf-7)" +
@@ -1017,30 +1043,46 @@ public void testExpression2() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(4, leaves.size());
+ String[] conditions = new String[]{
+ "eq(first_name, null)", /* first_name is null */
+ "not(eq(first_name, Binary{\"sue\"}))", /* first_name <> 'sue' */
+ "not(lt(id, 12))", /* id >= 12 */
+ "lteq(id, 4)" /* id <= 4 */
+ };
+
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = String.format("or(or(or(%1$s, %2$s), %3$s), %4$s)", conditions);
+ assertEquals(expected, p.toString());
+
PredicateLeaf leaf = leaves.get(0);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.IS_NULL, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals(null, leaf.getLiteral());
- assertEquals(null, leaf.getLiteralList());
+ assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
+ assertEquals(null, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC));
+ assertEquals(null, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(1);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("sue", leaf.getLiteral());
+ assertEquals("sue", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals("sue", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(2);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(12L, leaf.getLiteral());
+ assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(3);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(4L, leaf.getLiteral());
+ assertEquals(4L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(4, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)",
sarg.getExpression().toString());
@@ -1436,25 +1478,41 @@ public void testExpression3() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(3, leaves.size());
+ String[] conditions = new String[]{
+ "lt(id, 45)", /* id between 23 and 45 */
+ "not(lteq(id, 23))", /* id between 23 and 45 */
+ "eq(first_name, Binary{\"alan\"})", /* first_name = 'alan' */
+ "eq(last_name, Binary{\"smith\"})" /* 'smith' = last_name */
+ };
+
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = String.format("and(and(and(%1$s, %2$s), %3$s), %4$s)", conditions);
+ assertEquals(expected, p.toString());
+
PredicateLeaf leaf = leaves.get(0);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(null, leaf.getLiteral());
- assertEquals(23L, leaf.getLiteralList().get(0));
- assertEquals(45L, leaf.getLiteralList().get(1));
+ assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
+ assertEquals(23L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0));
+ assertEquals(23, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0));
+ assertEquals(45L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1));
+ assertEquals(45, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1));
leaf = leaves.get(1);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("alan", leaf.getLiteral());
+ assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(2);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("last_name", leaf.getColumnName());
- assertEquals("smith", leaf.getLiteral());
+ assertEquals("smith", leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals("smith", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
assertEquals("(and leaf-0 leaf-1 leaf-2)",
sarg.getExpression().toString());
@@ -1646,25 +1704,41 @@ id in (34,50) */
List leaves = sarg.getLeaves();
assertEquals(3, leaves.size());
+ String[] conditions = new String[]{
+ "not(eq(id, 12))", /* id <> 12 */
+ "or(eq(first_name, Binary{\"john\"}), eq(first_name, Binary{\"sue\"}))", /* first_name in
+ ('john', 'sue') */
+ "or(eq(id, 34), eq(id, 50))" /* id in (34,50) */
+ };
+
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = String.format("and(and(%1$s, %2$s), %3$s)", conditions);
+ assertEquals(expected, p.toString());
+
PredicateLeaf leaf = leaves.get(0);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(12L, leaf.getLiteral());
+ assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(1);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("john", leaf.getLiteralList().get(0));
- assertEquals("sue", leaf.getLiteralList().get(1));
+ assertEquals("john", leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0));
+ assertEquals("sue", leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1));
+ assertEquals("john", leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0));
+ assertEquals("sue", leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1));
leaf = leaves.get(2);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(34L, leaf.getLiteralList().get(0));
- assertEquals(50L, leaf.getLiteralList().get(1));
+ assertEquals(34L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0));
+ assertEquals(50L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1));
+ assertEquals(34, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0));
+ assertEquals(50, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1));
assertEquals("(and (not leaf-0) leaf-1 leaf-2)",
sarg.getExpression().toString());
@@ -1901,12 +1975,17 @@ public void testExpression5() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(1, leaves.size());
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected =
+ "and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, Binary{\"david\"})))";
+ assertEquals(p.toString(), expected);
+
assertEquals(PredicateLeaf.Type.STRING, leaves.get(0).getType());
assertEquals(PredicateLeaf.Operator.BETWEEN,
leaves.get(0).getOperator());
assertEquals("first_name", leaves.get(0).getColumnName());
- assertEquals("david", leaves.get(0).getLiteralList().get(0));
- assertEquals("greg", leaves.get(0).getLiteralList().get(1));
+ assertEquals("david", leaves.get(0).getLiteralList(PredicateLeaf.FileFormat.ORC).get(0));
+ assertEquals("greg", leaves.get(0).getLiteralList(PredicateLeaf.FileFormat.ORC).get(1));
assertEquals("leaf-0",
sarg.getExpression().toString());
@@ -2378,59 +2457,90 @@ public void testExpression7() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(9, leaves.size());
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(" +
+ "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), " +
+ "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 16))), " +
+ "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 13)), lt(id, 16))), " +
+ "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 14)), lt(id, 16))), " +
+ "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 14)), lt(id, 16))), " +
+ "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 14)), lt(id, 16))), " +
+ "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 15)), lt(id, 16))), " +
+ "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 15)), lt(id, 16))), " +
+ "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 15)), lt(id, 16))), " +
+ "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 17))), " +
+ "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 17))), " +
+ "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 13)), lt(id, 17))), " +
+ "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 14)), lt(id, 17))), " +
+ "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 14)), lt(id, 17))), " +
+ "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 14)), lt(id, 17))), " +
+ "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 15)), lt(id, 17))), " +
+ "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 15)), lt(id, 17))), " +
+ "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 15)), lt(id, 17)))";
+ assertEquals(p.toString(), expected);
+
PredicateLeaf leaf = leaves.get(0);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(18L, leaf.getLiteral());
+ assertEquals(18L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(18, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(1);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(10L, leaf.getLiteral());
+ assertEquals(10L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(10, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(2);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(13L, leaf.getLiteral());
+ assertEquals(13L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(13, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(3);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(16L, leaf.getLiteral());
+ assertEquals(16L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(16, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(4);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(11L, leaf.getLiteral());
+ assertEquals(11L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(11, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(5);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(12L, leaf.getLiteral());
+ assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(6);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(14L, leaf.getLiteral());
+ assertEquals(14L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(14, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(7);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(15L, leaf.getLiteral());
+ assertEquals(15L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(15, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
leaf = leaves.get(8);
assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(17L, leaf.getLiteral());
+ assertEquals(17L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(17, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET));
assertEquals("(and" +
" (or leaf-0 leaf-1 leaf-2 leaf-3)" +
@@ -2512,6 +2622,9 @@ public void testExpression8() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(0, leaves.size());
+ FilterPredicate p = sarg.toFilterPredicate();
+ assertNull(p);
+
assertEquals("YES_NO_NULL",
sarg.getExpression().toString());
}
@@ -2648,115 +2761,115 @@ public void testExpression9() throws Exception {
public void testExpression10() throws Exception {
/* id >= 10 and not (10 > id) */
String exprStr = " \n" +
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " id \n"+
- " \n"+
- " \n"+
- " orc_people \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " int \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " 10 \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " boolean \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " id \n"+
- " \n"+
- " \n"+
- " orc_people \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " 10 \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " id \n" +
+ " \n" +
+ " \n" +
+ " orc_people \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " int \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " 10 \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " boolean \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " id \n" +
+ " \n" +
+ " \n" +
+ " orc_people \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " 10 \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
" ";
SearchArgumentImpl sarg =
@@ -2764,11 +2877,16 @@ public void testExpression10() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(1, leaves.size());
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(not(lt(id, 10)), not(lt(id, 10)))";
+ assertEquals(expected, p.toString());
+
assertEquals(PredicateLeaf.Type.INTEGER, leaves.get(0).getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN,
leaves.get(0).getOperator());
assertEquals("id", leaves.get(0).getColumnName());
- assertEquals(10L, leaves.get(0).getLiteral());
+ assertEquals(10L, leaves.get(0).getLiteral(PredicateLeaf.FileFormat.ORC));
+ assertEquals(10, leaves.get(0).getLiteral(PredicateLeaf.FileFormat.PARQUET));
assertEquals("(and (not leaf-0) (not leaf-0))",
sarg.getExpression().toString());
@@ -2792,9 +2910,9 @@ public void testBuilder() throws Exception {
SearchArgument sarg =
SearchArgumentFactory.newBuilder()
.startAnd()
- .lessThan("x", 10)
- .lessThanEquals("y", "hi")
- .equals("z", 1.0)
+ .lessThan("x", 10)
+ .lessThanEquals("y", "hi")
+ .equals("z", 1.0)
.end()
.build();
assertEquals("leaf-0 = (LESS_THAN x 10)\n" +
@@ -2803,12 +2921,12 @@ public void testBuilder() throws Exception {
"expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString());
sarg = SearchArgumentFactory.newBuilder()
.startNot()
- .startOr()
- .isNull("x")
- .between("y", 10, 20)
- .in("z", 1, 2, 3)
- .nullSafeEquals("a", "stinger")
- .end()
+ .startOr()
+ .isNull("x")
+ .between("y", 10, 20)
+ .in("z", 1, 2, 3)
+ .nullSafeEquals("a", "stinger")
+ .end()
.end()
.build();
assertEquals("leaf-0 = (IS_NULL x)\n" +
@@ -2816,6 +2934,12 @@ public void testBuilder() throws Exception {
"leaf-2 = (IN z 1 2 3)\n" +
"leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" +
"expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString());
+
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected =
+ "and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), not(or(or(eq(z, 1), " +
+ "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))";
+ assertEquals(expected, p.toString());
}
@Test
@@ -2823,24 +2947,25 @@ public void testBuilderComplexTypes() throws Exception {
SearchArgument sarg =
SearchArgumentFactory.newBuilder()
.startAnd()
- .lessThan("x", new DateWritable(10))
- .lessThanEquals("y", new HiveChar("hi", 10))
- .equals("z", HiveDecimal.create("1.0"))
+ .lessThan("x", new DateWritable(10))
+ .lessThanEquals("y", new HiveChar("hi", 10))
+ .equals("z", HiveDecimal.create("1.0"))
.end()
.build();
assertEquals("leaf-0 = (LESS_THAN x 1970-01-11)\n" +
"leaf-1 = (LESS_THAN_EQUALS y hi)\n" +
"leaf-2 = (EQUALS z 1)\n" +
"expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString());
+ assertEquals("lteq(y, Binary{\"hi\"})", sarg.toFilterPredicate().toString());
sarg = SearchArgumentFactory.newBuilder()
.startNot()
- .startOr()
- .isNull("x")
- .between("y", HiveDecimal.create(10), 20.0)
- .in("z", (byte)1, (short)2, (int)3)
- .nullSafeEquals("a", new HiveVarchar("stinger", 100))
- .end()
+ .startOr()
+ .isNull("x")
+ .between("y", HiveDecimal.create(10), 20.0)
+ .in("z", (byte) 1, (short) 2, (int) 3)
+ .nullSafeEquals("a", new HiveVarchar("stinger", 100))
+ .end()
.end()
.build();
assertEquals("leaf-0 = (IS_NULL x)\n" +
@@ -2848,6 +2973,11 @@ public void testBuilderComplexTypes() throws Exception {
"leaf-2 = (IN z 1 2 3)\n" +
"leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" +
"expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString());
+
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
+ "not(eq(a, Binary{\"stinger\"})))";
+ assertEquals(expected, p.toString());
}
@Test
@@ -2864,13 +2994,14 @@ public void testBuilderComplexTypes2() throws Exception {
"leaf-1 = (LESS_THAN_EQUALS y hi)\n" +
"leaf-2 = (EQUALS z 1.0)\n" +
"expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString());
+ assertEquals("lteq(y, Binary{\"hi\"})", sarg.toFilterPredicate().toString());
sarg = SearchArgumentFactory.newBuilder()
.startNot()
.startOr()
.isNull("x")
.between("y", new BigDecimal(10), 20.0)
- .in("z", (byte)1, (short)2, (int)3)
+ .in("z", (byte) 1, (short) 2, (int) 3)
.nullSafeEquals("a", new HiveVarchar("stinger", 100))
.end()
.end()
@@ -2880,6 +3011,11 @@ public void testBuilderComplexTypes2() throws Exception {
"leaf-2 = (IN z 1 2 3)\n" +
"leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" +
"expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString());
+
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
+ "not(eq(a, Binary{\"stinger\"})))";
+ assertEquals(expected, p.toString());
}
@Test
@@ -2900,5 +3036,10 @@ public void testBuilderFloat() throws Exception {
"leaf-3 = (EQUALS z 0.22)\n" +
"leaf-4 = (EQUALS z1 0.22)\n" +
"expr = (and leaf-0 leaf-1 leaf-2 leaf-3 leaf-4)", sarg.toString());
+
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(and(and(lt(x, 22), lt(x1, 22)), lteq(y, Binary{\"hi\"})), eq(z, " +
+ "0.22)), eq(z1, 0.22))";
+ assertEquals(expected, p.toString());
}
}
diff --git serde/pom.xml serde/pom.xml
index 98e5506..8c60b30 100644
--- serde/pom.xml
+++ serde/pom.xml
@@ -75,6 +75,11 @@
opencsv
${opencsv.version}
+
+ com.twitter
+ parquet-hadoop-bundle
+ ${parquet.version}
+
diff --git serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
index 616c6db..41a14c2 100644
--- serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
+++ serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
@@ -43,7 +43,8 @@
* The possible types for sargs.
*/
public static enum Type {
- INTEGER, // all of the integer types
+ INTEGER, // all of the integer types except long
+ LONG,
FLOAT, // float and double
STRING, // string, char, varchar
DATE,
@@ -53,12 +54,20 @@
}
/**
+ * file format which supports search arguments
+ */
+ public static enum FileFormat {
+ ORC,
+ PARQUET
+ }
+
+ /**
* Get the operator for the leaf.
*/
public Operator getOperator();
/**
- * Get the type of the column and literal.
+ * Get the type of the column and literal by the file format.
*/
public Type getType();
@@ -69,14 +78,17 @@
public String getColumnName();
/**
- * Get the literal half of the predicate leaf.
- * @return a Long, Double, or String
+ * Get the literal half of the predicate leaf. Adapt the original type for what orc needs
+ * @return a Long, Double, or String for Orc and a Int, Long, Double, or String for parquet
*/
- public Object getLiteral();
+ public Object getLiteral(FileFormat format);
/**
* For operators with multiple literals (IN and BETWEEN), get the literals.
- * @return the list of literals (Longs, Doubles, or Strings)
+ *
+ * @return the list of literals (Longs, Doubles, or Strings) for orc or the list of literals
+ * (Integer, Longs, Doubles, or String) for parquet
*/
- public List getLiteralList();
+ public List getLiteralList(FileFormat format);
+
}
diff --git serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
index db0f014..9be54da 100644
--- serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
+++ serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.io.sarg;
+import parquet.filter2.predicate.FilterPredicate;
+
import java.util.List;
/**
@@ -175,6 +177,12 @@ public boolean isNeeded() {
public String toKryo();
/**
+ * Translate the search argument to the filter predicate parquet used
+ * @return
+ */
+ public FilterPredicate toFilterPredicate();
+
+ /**
* A builder object for contexts outside of Hive where it isn't easy to
* get a ExprNodeDesc. The user must call startOr, startAnd, or startNot
* before adding any leaves.