diff --git pom.xml pom.xml
index c694980..543ce71 100644
--- pom.xml
+++ pom.xml
@@ -145,7 +145,7 @@
requires netty < 3.6.0 we force hadoops version
-->
3.4.0.Final
- 1.5.0
+ 1.6.0rc3
0.12.0
2.5.0
1.0.1
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index bdc2806..324cbf7 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -2393,7 +2393,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
// the stats object is converted to text and comparison is performed.
// When STRINGs are converted to other base types, NumberFormat exception
// can occur in which case TruthValue.YES_NO_NULL value is returned
- Object baseObj = predicate.getLiteral();
+ Object baseObj = predicate.getOrcLiteral();
Object minValue = getConvertedStatsObj(min, baseObj);
Object maxValue = getConvertedStatsObj(max, baseObj);
Object predObj = getBaseObjectForComparison(baseObj, minValue);
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java
new file mode 100644
index 0000000..6422189
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet;
+
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import parquet.filter2.predicate.FilterApi;
+import parquet.filter2.predicate.FilterPredicate;
+
+import java.util.List;
+
+/**
+ * base class for building parquet supported filter predicate in primary types.
+ */
+public abstract class FilterPredicateLeafBuilder {
+ /**
+ * build filter predicate with multiple constants
+ *
+ * @param op IN or BETWEEN
+ * @param constants
+ * @param columnName
+ * @return
+ */
+ public FilterPredicate buildPredict(PredicateLeaf.Operator op, List constants,
+ String columnName) throws Exception {
+ FilterPredicate result = null;
+ switch (op) {
+ case IN:
+ for (Object o : constants) {
+ if (result == null) {
+ result = buildPredict(PredicateLeaf.Operator.EQUALS, o, columnName);
+ } else {
+ result = FilterApi.or(result, buildPredict(PredicateLeaf.Operator.EQUALS, o,
+ columnName));
+ }
+ }
+ return result;
+ case BETWEEN:
+ if (constants.size() != 2) return result;
+ Object min = constants.get(0);
+ Object max = constants.get(1);
+ FilterPredicate lt = FilterApi.not(buildPredict(PredicateLeaf.Operator.LESS_THAN_EQUALS,
+ min, columnName));
+ FilterPredicate gt = buildPredict(PredicateLeaf.Operator.LESS_THAN, max, columnName);
+ result = FilterApi.and(gt, lt);
+ return result;
+ default:
+ return result;
+ }
+ }
+
+ /**
+ * build predicate with a single constant
+ *
+ * @param op EQUALS, NULL_SAFE_EQUALS, LESS_THAN, LESS_THAN_EQUALS, IS_NULL
+ * @param constant
+ * @param columnName
+ * @return null or a FilterPredicate, null means no filter will be executed
+ */
+ public abstract FilterPredicate buildPredict(PredicateLeaf.Operator op, Object constant,
+ String columnName) throws Exception;
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
new file mode 100644
index 0000000..5b04668
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
@@ -0,0 +1,166 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet;
+
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import parquet.filter2.predicate.FilterApi;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator;
+import parquet.filter2.predicate.FilterPredicate;
+import parquet.io.api.Binary;
+
+public class LeafFilterFactory {
+ class IntFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+ /**
+ * op consists of EQUALS, NULL_SAFE_EQUALS, LESS_THAN, LESS_THAN_EQUALS, IS_NULL
+ * @param op
+ * @param constant
+ * @param columnName
+ * @return
+ */
+ @Override
+ public FilterPredicate buildPredict(Operator op, Object constant,
+ String columnName) {
+ switch (op) {
+ case LESS_THAN:
+ return FilterApi.lt(FilterApi.intColumn(columnName),
+ ((Number) constant).intValue());
+ case IS_NULL:
+ case EQUALS:
+ case NULL_SAFE_EQUALS:
+ return FilterApi.eq(FilterApi.intColumn(columnName),
+ (constant == null) ? null : ((Number) constant).intValue());
+ case LESS_THAN_EQUALS:
+ return FilterApi.ltEq(FilterApi.intColumn(columnName),
+ ((Number) constant).intValue());
+ default:
+ return null;
+ }
+ }
+ }
+
+ class LongFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+ @Override
+ public FilterPredicate buildPredict(Operator op, Object constant,
+ String columnName) {
+ switch (op) {
+ case LESS_THAN:
+ return FilterApi.lt(FilterApi.longColumn(columnName),
+ ((Number) constant).longValue());
+ case IS_NULL:
+ case EQUALS:
+ case NULL_SAFE_EQUALS:
+ return FilterApi.eq(FilterApi.longColumn(columnName),
+ (constant == null) ? null : ((Number) constant).longValue());
+ case LESS_THAN_EQUALS:
+ return FilterApi.ltEq(FilterApi.longColumn(columnName),
+ ((Number) constant).longValue());
+ default:
+ return null;
+ }
+ }
+ }
+
+ class DoubleFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+
+ @Override
+ public FilterPredicate buildPredict(Operator op, Object constant,
+ String columnName) {
+ switch (op) {
+ case LESS_THAN:
+ return FilterApi.lt(FilterApi.doubleColumn(columnName),
+ ((Number) constant).doubleValue());
+ case IS_NULL:
+ case EQUALS:
+ case NULL_SAFE_EQUALS:
+ return FilterApi.eq(FilterApi.doubleColumn(columnName),
+ (constant == null) ? null : ((Number) constant).doubleValue());
+ case LESS_THAN_EQUALS:
+ return FilterApi.ltEq(FilterApi.doubleColumn(columnName),
+ ((Number) constant).doubleValue());
+ default:
+ return null;
+ }
+ }
+ }
+
+ class BooleanFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+ @Override
+ public FilterPredicate buildPredict(Operator op, Object constant,
+ String columnName) throws Exception{
+ switch (op) {
+ case IS_NULL:
+ case EQUALS:
+ case NULL_SAFE_EQUALS:
+ return FilterApi.eq(FilterApi.booleanColumn(columnName),
+ (constant == null) ? null : ((Boolean)constant).booleanValue());
+ default:
+ return null;
+ }
+ }
+ }
+
+ class BinaryFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+ @Override
+ public FilterPredicate buildPredict(Operator op, Object constant,
+ String columnName) throws Exception{
+ switch (op) {
+ case LESS_THAN:
+ return FilterApi.lt(FilterApi.binaryColumn(columnName),
+ Binary.fromString((String) constant));
+ case IS_NULL:
+ case EQUALS:
+ case NULL_SAFE_EQUALS:
+ return FilterApi.eq(FilterApi.binaryColumn(columnName),
+ (constant == null) ? null : Binary.fromString((String) constant));
+ case LESS_THAN_EQUALS:
+ return FilterApi.ltEq(FilterApi.binaryColumn(columnName),
+ Binary.fromString((String) constant));
+ default:
+ // should never be executed
+ return null;
+ }
+ }
+ }
+
+ /**
+ * get leaf filter builder by FilterPredicateType, currently date, decimal and timestamp is not
+ * supported yet.
+ * @param type FilterPredicateType
+ * @return
+ */
+ public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type type){
+ switch (type){
+ case INTEGER:
+ return new IntFilterPredicateLeafBuilder();
+ case LONG:
+ return new LongFilterPredicateLeafBuilder();
+ case FLOAT: // float and double
+ return new DoubleFilterPredicateLeafBuilder();
+ case STRING: // string, char, varchar
+ return new BinaryFilterPredicateLeafBuilder();
+ case BOOLEAN:
+ return new BooleanFilterPredicateLeafBuilder();
+ case DATE:
+ case DECIMAL:
+ case TIMESTAMP:
+ default:
+ return null;
+ }
+ }
+
+ public enum FilterOp {
+ LessThan,
+ Equals
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
index f5da46d..4694b69 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
@@ -20,8 +20,14 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.IOConstants;
import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileSplit;
@@ -32,6 +38,7 @@
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
+import parquet.filter2.predicate.FilterPredicate;
import parquet.hadoop.ParquetFileReader;
import parquet.hadoop.ParquetInputFormat;
import parquet.hadoop.ParquetInputSplit;
@@ -83,6 +90,8 @@ public ParquetRecordReaderWrapper(
taskAttemptID = new TaskAttemptID();
}
+ this.setFilter(oldJobConf);
+
// create a TaskInputOutputContext
final TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(oldJobConf, taskAttemptID);
@@ -110,6 +119,22 @@ public ParquetRecordReaderWrapper(
}
}
+ public void setFilter(final JobConf conf){
+ FilterPredicate p = null;
+ String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
+ String columnNamesString =
+ conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
+ if ((serializedPushdown == null) && columnNamesString == null){
+ return;
+ }
+ SearchArgument sarg;
+ if(serializedPushdown != null){
+ sarg = SearchArgumentFactory.create(Utilities.deserializeExpression(serializedPushdown));
+ p = sarg.toFilterPredicate();
+ ParquetInputFormat.setFilterPredicate(conf, p);
+ }
+ }
+
@Override
public void close() throws IOException {
if (realReader != null) {
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
index eeb9641..130adb8 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
@@ -30,9 +30,13 @@
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder;
+import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
@@ -59,11 +63,14 @@
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
+import parquet.filter2.predicate.FilterApi;
+import parquet.filter2.predicate.FilterPredicate;
/**
* The implementation of SearchArguments.
*/
final class SearchArgumentImpl implements SearchArgument {
+ public static final Log LOG = LogFactory.getLog(SearchArgumentImpl.class);
static final class PredicateLeafImpl implements PredicateLeaf {
private final Operator operator;
@@ -98,7 +105,14 @@ public Operator getOperator() {
}
@Override
- public Type getType() {
+ public Type getParquetType() {
+ return type;
+ }
+
+ public Type getOrcType() {
+ if(type == Type.LONG){
+ return Type.INTEGER;
+ }
return type;
}
@@ -108,7 +122,22 @@ public String getColumnName() {
}
@Override
- public Object getLiteral() {
+ public Object getOrcLiteral() {
+ // To get around a kryo 2.22 bug while deserialize a Timestamp into Date
+ // (https://github.com/EsotericSoftware/kryo/issues/88)
+ // When we see a Date, convert back into Timestamp
+ if (literal instanceof java.util.Date) {
+ return new Timestamp(((java.util.Date)literal).getTime());
+ }
+ // adapt base type to what orc needs
+ if(literal instanceof Integer){
+ return Long.valueOf(literal.toString());
+ }
+ return literal;
+ }
+
+ @Override
+ public Object getParquetLiteral() {
// To get around a kryo 2.22 bug while deserialize a Timestamp into Date
// (https://github.com/EsotericSoftware/kryo/issues/88)
// When we see a Date, convert back into Timestamp
@@ -120,6 +149,20 @@ public Object getLiteral() {
@Override
public List getLiteralList() {
+ // no need to cast
+ if (literalList == null || literalList.size() == 0 || !(literalList.get(0) instanceof
+ Integer)) {
+ return literalList;
+ }
+ List result = new ArrayList();
+ for (Object o : literalList) {
+ result.add(Long.valueOf((Integer) o));
+ }
+ return result;
+ }
+
+ @Override
+ public List getParquetLiteralList() {
return literalList;
}
@@ -254,6 +297,76 @@ TruthValue evaluate(TruthValue[] leaves) {
}
}
+ FilterPredicate translate(List leafs){
+ FilterPredicate p = null;
+ switch (operator) {
+ case OR:
+ for(ExpressionTree child: children) {
+ if (p == null) {
+ p = child.translate(leafs);
+ } else {
+ FilterPredicate right = child.translate(leafs);
+ // constant means no filter, ignore it when it is null
+ if(right != null){
+ p = FilterApi.and(p, right);
+ }
+ }
+ }
+ return p;
+ case AND:
+ for(ExpressionTree child: children) {
+ if (p == null) {
+ p = child.translate(leafs);
+ } else {
+ FilterPredicate right = child.translate(leafs);
+ // constant means no filter, ignore it when it is null
+ if(right != null){
+ p = FilterApi.and(p, right);
+ }
+ }
+ }
+ return p;
+ case NOT:
+ FilterPredicate op = children.get(0).translate(leafs);
+ if(op != null){
+ return FilterApi.not(op);
+ }else{
+ return null;
+ }
+ case LEAF:
+ return buildFilterPredicateFromPredicateLeaf(leafs.get(leaf));
+ case CONSTANT:
+ return null;// no filter will be executed for constant
+ default:
+ throw new IllegalStateException("Unknown operator: " + operator);
+ }
+ }
+
+ private FilterPredicate buildFilterPredicateFromPredicateLeaf(PredicateLeaf leaf) {
+ LeafFilterFactory leafFilterFactory = new LeafFilterFactory();
+ FilterPredicateLeafBuilder builder;
+ try {
+ if (isMultiLiteralsOperator(leaf.getOperator())) {
+ builder = leafFilterFactory.getLeafFilterBuilderByType(leaf.getParquetType());
+ if (builder == null) return null;
+ return builder.buildPredict(leaf.getOperator(), leaf.getParquetLiteralList(),
+ leaf.getColumnName());
+ } else {
+ builder = leafFilterFactory.getLeafFilterBuilderByType(leaf.getParquetType());
+ if (builder == null) return null;
+ return builder.buildPredict(leaf.getOperator(), leaf.getParquetLiteral(),
+ leaf.getColumnName());
+ }
+ } catch (Exception e) {
+ LOG.error("fail to build predicate filter leaf with errors", e);
+ return null;
+ }
+ }
+
+ private boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) {
+ return (op == PredicateLeaf.Operator.IN) || (op == PredicateLeaf.Operator.BETWEEN);
+ }
+
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
@@ -314,8 +427,9 @@ Operator getOperator() {
case BYTE:
case SHORT:
case INT:
- case LONG:
return PredicateLeaf.Type.INTEGER;
+ case LONG:
+ return PredicateLeaf.Type.LONG;
case CHAR:
case VARCHAR:
case STRING:
@@ -360,6 +474,8 @@ private static String getColumnName(ExprNodeGenericFuncDesc expr,
private static Object boxLiteral(ExprNodeConstantDesc lit) {
switch (getType(lit)) {
case INTEGER:
+ return ((Number) lit.getValue()).intValue();
+ case LONG:
return ((Number) lit.getValue()).longValue();
case STRING:
return StringUtils.stripEnd(lit.getValue().toString(), null);
@@ -420,6 +536,7 @@ private ExpressionTree createLeaf(PredicateLeaf.Operator operator,
if (type == null) {
return new ExpressionTree(TruthValue.YES_NO_NULL);
}
+
Object literal = null;
List literalList = null;
switch (operator) {
@@ -903,6 +1020,11 @@ static SearchArgument fromKryo(String value) {
return new Kryo().readObject(input, SearchArgumentImpl.class);
}
+ @Override
+ public FilterPredicate toFilterPredicate() {
+ return expression.translate(leaves);
+ }
+
private static class BuilderImpl implements Builder {
private final Deque currentTree =
new ArrayDeque();
@@ -973,7 +1095,7 @@ private static Object boxLiteral(Object literal) {
} else if (literal instanceof Byte ||
literal instanceof Short ||
literal instanceof Integer) {
- return Long.valueOf(literal.toString());
+ return Integer.valueOf(literal.toString());
} else if (literal instanceof Float) {
// to avoid change in precision when upcasting float to double
// we convert the literal to string and parse it as double. (HIVE-8460)
@@ -987,10 +1109,11 @@ private static Object boxLiteral(Object literal) {
private static PredicateLeaf.Type getType(Object literal) {
if (literal instanceof Byte ||
literal instanceof Short ||
- literal instanceof Integer ||
- literal instanceof Long) {
+ literal instanceof Integer) {
return PredicateLeaf.Type.INTEGER;
- } else if (literal instanceof HiveChar ||
+ } else if(literal instanceof Long){
+ return PredicateLeaf.Type.LONG;
+ }else if (literal instanceof HiveChar ||
literal instanceof HiveVarchar ||
literal instanceof String) {
return PredicateLeaf.Type.STRING;
@@ -1005,7 +1128,7 @@ private static Object boxLiteral(Object literal) {
literal instanceof BigDecimal) {
return PredicateLeaf.Type.DECIMAL;
} else if (literal instanceof Boolean) {
- return PredicateLeaf.Type.BOOLEAN;
+ return PredicateLeaf.Type.BOOLEAN;
}
throw new IllegalArgumentException("Unknown type for literal " + literal);
}
@@ -1069,6 +1192,7 @@ public Builder in(String column, Object... literal) {
for(Object lit: literal){
argList.add(boxLiteral(lit));
}
+
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.IN,
getType(argList.get(0)), column, null, argList);
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
index 831ef8c..f6e9011 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
@@ -22,14 +22,13 @@
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionBuilder;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionTree;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.junit.Test;
+import parquet.filter2.predicate.FilterPredicate;
import java.beans.XMLDecoder;
import java.io.ByteArrayInputStream;
@@ -39,6 +38,7 @@
import java.util.Set;
import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNull;
import static junit.framework.Assert.assertTrue;
/**
@@ -47,7 +47,7 @@
* to true and using a custom record reader that prints out the value of
* hive.io.filter.expr.serialized in createRecordReader. This should be
* replaced by generating the AST using the API and passing that in.
- *
+ *
* In each case, the corresponding part of the where clause is in the
* comment above the blob.
*/
@@ -76,12 +76,11 @@ private ExpressionTree constant(TruthValue val) {
/**
* Create a predicate leaf. This is used by another test.
*/
- public static
- PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator,
- PredicateLeaf.Type type,
- String columnName,
- Object literal,
- List literalList) {
+ public static PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator,
+ PredicateLeaf.Type type,
+ String columnName,
+ Object literal,
+ List literalList) {
return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName,
literal, literalList);
}
@@ -134,7 +133,7 @@ public void testFlatten() throws Exception {
).toString());
assertEquals("(and leaf-1 leaf-2 leaf-3 leaf-4)",
ExpressionBuilder.flatten(and(and(leaf(1), leaf(2)),
- and(leaf(3),leaf(4)))).toString());
+ and(leaf(3), leaf(4)))).toString());
assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4)",
ExpressionBuilder.flatten(or(leaf(1), or(leaf(2), or(leaf(3),
leaf(4))))).toString());
@@ -143,11 +142,11 @@ public void testFlatten() throws Exception {
leaf(4))).toString());
assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4 leaf-5 leaf-6)",
ExpressionBuilder.flatten(or(or(leaf(1), or(leaf(2), leaf(3))),
- or(or(leaf(4),leaf(5)), leaf(6)))).toString());
+ or(or(leaf(4), leaf(5)), leaf(6)))).toString());
assertEquals("(and (not leaf-1) leaf-2 (not leaf-3) leaf-4 (not leaf-5) leaf-6)",
ExpressionBuilder.flatten(and(and(not(leaf(1)), and(leaf(2),
not(leaf(3)))), and(and(leaf(4), not(leaf(5))), leaf(6)))
- ).toString());
+ ).toString());
assertEquals("(not (and leaf-1 leaf-2 leaf-3))",
ExpressionBuilder.flatten(not(and(leaf(1), and(leaf(2), leaf(3))))
).toString());
@@ -245,20 +244,20 @@ public void testCNF() throws Exception {
private static void assertNoSharedNodes(ExpressionTree tree,
Set seen
- ) throws Exception {
+ ) throws Exception {
if (seen.contains(tree) &&
tree.getOperator() != ExpressionTree.Operator.LEAF) {
assertTrue("repeated node in expression " + tree, false);
}
seen.add(tree);
if (tree.getChildren() != null) {
- for(ExpressionTree child: tree.getChildren()) {
+ for (ExpressionTree child : tree.getChildren()) {
assertNoSharedNodes(child, seen);
}
}
}
- private ExprNodeGenericFuncDesc getFuncDesc (String xmlSerialized) {
+ private ExprNodeGenericFuncDesc getFuncDesc(String xmlSerialized) {
byte[] bytes;
try {
bytes = xmlSerialized.getBytes("UTF-8");
@@ -275,6 +274,7 @@ private ExprNodeGenericFuncDesc getFuncDesc (String xmlSerialized) {
decoder.close();
}
}
+
@Test
public void testExpression1() throws Exception {
// first_name = 'john' or
@@ -749,59 +749,68 @@ public void testExpression1() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(9, leaves.size());
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(and(and(and(and(and(and(eq(first_name, Binary{\"john\"}), " +
+ "not(lteq(first_name, Binary{\"greg\"}))), lt(first_name, Binary{\"alan\"})), " +
+ "not(lteq(id, 12))), not(lteq(id, 13))), lt(id, 15)), lt(id, 16)), eq(id, 30)), " +
+ "and(and(and(and(and(and(and(eq(first_name, Binary{\"john\"}), not(lteq(first_name, " +
+ "Binary{\"greg\"}))), lt(first_name, Binary{\"alan\"})), not(lteq(id, 12))), " +
+ "not(lteq(id, 13))), lt(id, 15)), lt(id, 16)), eq(first_name, Binary{\"owen\"})))";
+ assertEquals(expected, p.toString());
+
PredicateLeaf leaf = leaves.get(0);
- assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
+ assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("john", leaf.getLiteral());
+ assertEquals("john", leaf.getOrcLiteral());
leaf = leaves.get(1);
- assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
+ assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("greg", leaf.getLiteral());
+ assertEquals("greg", leaf.getOrcLiteral());
leaf = leaves.get(2);
- assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
+ assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("alan", leaf.getLiteral());
+ assertEquals("alan", leaf.getOrcLiteral());
leaf = leaves.get(3);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(12L, leaf.getLiteral());
+ assertEquals(12L, leaf.getOrcLiteral());
leaf = leaves.get(4);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(13L, leaf.getLiteral());
+ assertEquals(13L, leaf.getOrcLiteral());
leaf = leaves.get(5);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(15L, leaf.getLiteral());
+ assertEquals(15L, leaf.getOrcLiteral());
leaf = leaves.get(6);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(16L, leaf.getLiteral());
+ assertEquals(16L, leaf.getOrcLiteral());
leaf = leaves.get(7);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(30L, leaf.getLiteral());
+ assertEquals(30L, leaf.getOrcLiteral());
leaf = leaves.get(8);
- assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
+ assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("owen", leaf.getLiteral());
+ assertEquals("owen", leaf.getOrcLiteral());
assertEquals("(and (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" +
" (not leaf-4) leaf-5 leaf-6 leaf-7)" +
@@ -1017,30 +1026,35 @@ public void testExpression2() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(4, leaves.size());
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(and(eq(first_name, null), not(eq(first_name, Binary{\"sue\"}))), " +
+ "not(lt(id, 12))), lteq(id, 4))";
+ assertEquals(p.toString(), expected);
+
PredicateLeaf leaf = leaves.get(0);
- assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
+ assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.IS_NULL, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals(null, leaf.getLiteral());
+ assertEquals(null, leaf.getOrcLiteral());
assertEquals(null, leaf.getLiteralList());
leaf = leaves.get(1);
- assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
+ assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("sue", leaf.getLiteral());
+ assertEquals("sue", leaf.getOrcLiteral());
leaf = leaves.get(2);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(12L, leaf.getLiteral());
+ assertEquals(12L, leaf.getOrcLiteral());
leaf = leaves.get(3);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(4L, leaf.getLiteral());
+ assertEquals(4L, leaf.getOrcLiteral());
assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)",
sarg.getExpression().toString());
@@ -1436,25 +1450,30 @@ public void testExpression3() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(3, leaves.size());
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(and(lt(id, 45), not(lteq(id, 23))), eq(first_name, " +
+ "Binary{\"alan\"})), eq(last_name, Binary{\"smith\"}))";
+ assertEquals(p.toString(), expected);
+
PredicateLeaf leaf = leaves.get(0);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(null, leaf.getLiteral());
+ assertEquals(null, leaf.getOrcLiteral());
assertEquals(23L, leaf.getLiteralList().get(0));
assertEquals(45L, leaf.getLiteralList().get(1));
leaf = leaves.get(1);
- assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
+ assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
- assertEquals("alan", leaf.getLiteral());
+ assertEquals("alan", leaf.getOrcLiteral());
leaf = leaves.get(2);
- assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
+ assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("last_name", leaf.getColumnName());
- assertEquals("smith", leaf.getLiteral());
+ assertEquals("smith", leaf.getOrcLiteral());
assertEquals("(and leaf-0 leaf-1 leaf-2)",
sarg.getExpression().toString());
@@ -1646,21 +1665,26 @@ id in (34,50) */
List leaves = sarg.getLeaves();
assertEquals(3, leaves.size());
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(not(eq(id, 12)), or(eq(first_name, Binary{\"john\"}), " +
+ "eq(first_name, Binary{\"sue\"}))), or(eq(id, 34), eq(id, 50)))";
+ assertEquals(p.toString(), expected);
+
PredicateLeaf leaf = leaves.get(0);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(12L, leaf.getLiteral());
+ assertEquals(12L, leaf.getOrcLiteral());
leaf = leaves.get(1);
- assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
+ assertEquals(PredicateLeaf.Type.STRING, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
assertEquals("john", leaf.getLiteralList().get(0));
assertEquals("sue", leaf.getLiteralList().get(1));
leaf = leaves.get(2);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
assertEquals(34L, leaf.getLiteralList().get(0));
@@ -1901,7 +1925,12 @@ public void testExpression5() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(1, leaves.size());
- assertEquals(PredicateLeaf.Type.STRING, leaves.get(0).getType());
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, " +
+ "Binary{\"david\"})))";
+ assertEquals(p.toString(), expected);
+
+ assertEquals(PredicateLeaf.Type.STRING, leaves.get(0).getOrcType());
assertEquals(PredicateLeaf.Operator.BETWEEN,
leaves.get(0).getOperator());
assertEquals("first_name", leaves.get(0).getColumnName());
@@ -2378,59 +2407,81 @@ public void testExpression7() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(9, leaves.size());
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and" +
+ "(and(and(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), and(and(and(lt(id, 18), " +
+ "lt(id, 11)), lt(id, 13)), lt(id, 16))), " +
+ "and(and(and(lt(id, 18), lt(id, 12)), lt(id, 13)), lt(id, 16))), and(and(and(lt(id, 18), " +
+ "lt(id, 10)), lt(id, 14)), lt(id, 16))), and(and(and(lt(id, 18), " +
+ "lt(id, 11)), lt(id, 14)), lt(id, 16))), and(and(and(lt(id, 18), " +
+ "lt(id, 12)), lt(id, 14)), lt(id, 16))), and(and(and(lt(id, 18), " +
+ "lt(id, 10)), lt(id, 15)), lt(id, 16))), and(and(and(lt(id, 18), " +
+ "lt(id, 11)), lt(id, 15)), lt(id, 16))), and(and(and(lt(id, 18), " +
+ "lt(id, 12)), lt(id, 15)), lt(id, 16))), and(and(and(lt(id, 18), " +
+ "lt(id, 10)), lt(id, 13)), lt(id, 17))), and(and(and(lt(id, 18), " +
+ "lt(id, 11)), lt(id, 13)), lt(id, 17))), and(and(and(lt(id, 18), " +
+ "lt(id, 12)), lt(id, 13)), lt(id, 17))), and(and(and(lt(id, 18), " +
+ "lt(id, 10)), lt(id, 14)), lt(id, 17))), and(and(and(lt(id, 18), " +
+ "lt(id, 11)), lt(id, 14)), lt(id, 17))), and(and(and(lt(id, 18), " +
+ "lt(id, 12)), lt(id, 14)), lt(id, 17))), and(and(and(lt(id, 18), " +
+ "lt(id, 10)), lt(id, 15)), lt(id, 17))), and(and(and(lt(id, 18), " +
+ "lt(id, 11)), lt(id, 15)), lt(id, 17))), and(and(and(lt(id, 18), " +
+ "lt(id, 12)), lt(id, 15)), lt(id, 17)))";
+ assertEquals(p.toString(), expected);
+
PredicateLeaf leaf = leaves.get(0);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(18L, leaf.getLiteral());
+ assertEquals(18L, leaf.getOrcLiteral());
leaf = leaves.get(1);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(10L, leaf.getLiteral());
+ assertEquals(10L, leaf.getOrcLiteral());
leaf = leaves.get(2);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(13L, leaf.getLiteral());
+ assertEquals(13L, leaf.getOrcLiteral());
leaf = leaves.get(3);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(16L, leaf.getLiteral());
+ assertEquals(16L, leaf.getOrcLiteral());
leaf = leaves.get(4);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(11L, leaf.getLiteral());
+ assertEquals(11L, leaf.getOrcLiteral());
leaf = leaves.get(5);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(12L, leaf.getLiteral());
+ assertEquals(12L, leaf.getOrcLiteral());
leaf = leaves.get(6);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(14L, leaf.getLiteral());
+ assertEquals(14L, leaf.getOrcLiteral());
leaf = leaves.get(7);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(15L, leaf.getLiteral());
+ assertEquals(15L, leaf.getOrcLiteral());
leaf = leaves.get(8);
- assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
+ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
- assertEquals(17L, leaf.getLiteral());
+ assertEquals(17L, leaf.getOrcLiteral());
assertEquals("(and" +
" (or leaf-0 leaf-1 leaf-2 leaf-3)" +
@@ -2512,6 +2563,9 @@ public void testExpression8() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(0, leaves.size());
+ FilterPredicate p = sarg.toFilterPredicate();
+ assertNull(p);
+
assertEquals("YES_NO_NULL",
sarg.getExpression().toString());
}
@@ -2648,115 +2702,115 @@ public void testExpression9() throws Exception {
public void testExpression10() throws Exception {
/* id >= 10 and not (10 > id) */
String exprStr = " \n" +
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " id \n"+
- " \n"+
- " \n"+
- " orc_people \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " int \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " 10 \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " boolean \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " id \n"+
- " \n"+
- " \n"+
- " orc_people \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " 10 \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
- " \n"+
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " id \n" +
+ " \n" +
+ " \n" +
+ " orc_people \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " int \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " 10 \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " boolean \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " id \n" +
+ " \n" +
+ " \n" +
+ " orc_people \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " 10 \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
" ";
SearchArgumentImpl sarg =
@@ -2764,11 +2818,15 @@ public void testExpression10() throws Exception {
List leaves = sarg.getLeaves();
assertEquals(1, leaves.size());
- assertEquals(PredicateLeaf.Type.INTEGER, leaves.get(0).getType());
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(not(lt(id, 10)), not(lt(id, 10)))";
+ assertEquals(expected, p.toString());
+
+ assertEquals(PredicateLeaf.Type.INTEGER, leaves.get(0).getOrcType());
assertEquals(PredicateLeaf.Operator.LESS_THAN,
leaves.get(0).getOperator());
assertEquals("id", leaves.get(0).getColumnName());
- assertEquals(10L, leaves.get(0).getLiteral());
+ assertEquals(10L, leaves.get(0).getOrcLiteral());
assertEquals("(and (not leaf-0) (not leaf-0))",
sarg.getExpression().toString());
@@ -2792,9 +2850,9 @@ public void testBuilder() throws Exception {
SearchArgument sarg =
SearchArgumentFactory.newBuilder()
.startAnd()
- .lessThan("x", 10)
- .lessThanEquals("y", "hi")
- .equals("z", 1.0)
+ .lessThan("x", 10)
+ .lessThanEquals("y", "hi")
+ .equals("z", 1.0)
.end()
.build();
assertEquals("leaf-0 = (LESS_THAN x 10)\n" +
@@ -2803,12 +2861,12 @@ public void testBuilder() throws Exception {
"expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString());
sarg = SearchArgumentFactory.newBuilder()
.startNot()
- .startOr()
- .isNull("x")
- .between("y", 10, 20)
- .in("z", 1, 2, 3)
- .nullSafeEquals("a", "stinger")
- .end()
+ .startOr()
+ .isNull("x")
+ .between("y", 10, 20)
+ .in("z", 1, 2, 3)
+ .nullSafeEquals("a", "stinger")
+ .end()
.end()
.build();
assertEquals("leaf-0 = (IS_NULL x)\n" +
@@ -2816,6 +2874,11 @@ public void testBuilder() throws Exception {
"leaf-2 = (IN z 1 2 3)\n" +
"leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" +
"expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString());
+
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), " +
+ "not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))";
+ assertEquals(expected, p.toString());
}
@Test
@@ -2823,9 +2886,9 @@ public void testBuilderComplexTypes() throws Exception {
SearchArgument sarg =
SearchArgumentFactory.newBuilder()
.startAnd()
- .lessThan("x", new DateWritable(10))
- .lessThanEquals("y", new HiveChar("hi", 10))
- .equals("z", HiveDecimal.create("1.0"))
+ .lessThan("x", new DateWritable(10))
+ .lessThanEquals("y", new HiveChar("hi", 10))
+ .equals("z", HiveDecimal.create("1.0"))
.end()
.build();
assertEquals("leaf-0 = (LESS_THAN x 1970-01-11)\n" +
@@ -2835,12 +2898,12 @@ public void testBuilderComplexTypes() throws Exception {
sarg = SearchArgumentFactory.newBuilder()
.startNot()
- .startOr()
- .isNull("x")
- .between("y", HiveDecimal.create(10), 20.0)
- .in("z", (byte)1, (short)2, (int)3)
- .nullSafeEquals("a", new HiveVarchar("stinger", 100))
- .end()
+ .startOr()
+ .isNull("x")
+ .between("y", HiveDecimal.create(10), 20.0)
+ .in("z", (byte) 1, (short) 2, (int) 3)
+ .nullSafeEquals("a", new HiveVarchar("stinger", 100))
+ .end()
.end()
.build();
assertEquals("leaf-0 = (IS_NULL x)\n" +
@@ -2848,6 +2911,11 @@ public void testBuilderComplexTypes() throws Exception {
"leaf-2 = (IN z 1 2 3)\n" +
"leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" +
"expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString());
+
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
+ "not(eq(a, Binary{\"stinger\"})))";
+ assertEquals(expected, p.toString());
}
@Test
@@ -2870,7 +2938,7 @@ public void testBuilderComplexTypes2() throws Exception {
.startOr()
.isNull("x")
.between("y", new BigDecimal(10), 20.0)
- .in("z", (byte)1, (short)2, (int)3)
+ .in("z", (byte) 1, (short) 2, (int) 3)
.nullSafeEquals("a", new HiveVarchar("stinger", 100))
.end()
.end()
@@ -2880,6 +2948,11 @@ public void testBuilderComplexTypes2() throws Exception {
"leaf-2 = (IN z 1 2 3)\n" +
"leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" +
"expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString());
+
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
+ "not(eq(a, Binary{\"stinger\"})))";
+ assertEquals(expected, p.toString());
}
@Test
@@ -2900,5 +2973,10 @@ public void testBuilderFloat() throws Exception {
"leaf-3 = (EQUALS z 0.22)\n" +
"leaf-4 = (EQUALS z1 0.22)\n" +
"expr = (and leaf-0 leaf-1 leaf-2 leaf-3 leaf-4)", sarg.toString());
+
+ FilterPredicate p = sarg.toFilterPredicate();
+ String expected = "and(and(and(and(lt(x, 22), lt(x1, 22)), lteq(y, Binary{\"hi\"})), eq(z, " +
+ "0.22)), eq(z1, 0.22))";
+ assertEquals(expected, p.toString());
}
}
diff --git serde/pom.xml serde/pom.xml
index 98e5506..8c60b30 100644
--- serde/pom.xml
+++ serde/pom.xml
@@ -75,6 +75,11 @@
opencsv
${opencsv.version}
+
+ com.twitter
+ parquet-hadoop-bundle
+ ${parquet.version}
+
diff --git serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
index 616c6db..0076220 100644
--- serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
+++ serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
@@ -43,7 +43,8 @@
* The possible types for sargs.
*/
public static enum Type {
- INTEGER, // all of the integer types
+ INTEGER, // all of the integer types except long
+ LONG,
FLOAT, // float and double
STRING, // string, char, varchar
DATE,
@@ -58,9 +59,14 @@
public Operator getOperator();
/**
- * Get the type of the column and literal.
+ * Get the type of the column and literal for parquet.
*/
- public Type getType();
+ public Type getOrcType();
+
+ /**
+ * Get the type of the column and literal for ORC.
+ */
+ public Type getParquetType();
/**
* Get the simple column name.
@@ -69,14 +75,26 @@
public String getColumnName();
/**
- * Get the literal half of the predicate leaf.
+ * Get the literal half of the predicate leaf. Adapt the original type for what orc needs
* @return a Long, Double, or String
*/
- public Object getLiteral();
+ public Object getOrcLiteral();
+
+ /**
+ * Get the literal half of the predicate leaf. Adapt the original type for what parquet needs
+ * @return a Int, Long, Double, or String
+ */
+ public Object getParquetLiteral();
/**
* For operators with multiple literals (IN and BETWEEN), get the literals.
* @return the list of literals (Longs, Doubles, or Strings)
*/
public List getLiteralList();
+
+ /**
+ * For operators with multiple literals (IN and BETWEEN), get the literals.
+ * @return the list of literals (Ints, Longs, Doubles, or Strings)
+ */
+ public List getParquetLiteralList();
}
diff --git serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
index db0f014..3642fdb 100644
--- serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
+++ serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.io.sarg;
+import parquet.filter2.predicate.FilterPredicate;
+
import java.util.List;
/**
@@ -175,6 +177,12 @@ public boolean isNeeded() {
public String toKryo();
/**
+ * translate the search argument to the filter predicate parquet used
+ * @return
+ */
+ public FilterPredicate toFilterPredicate();
+
+ /**
* A builder object for contexts outside of Hive where it isn't easy to
* get a ExprNodeDesc. The user must call startOr, startAnd, or startNot
* before adding any leaves.