diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index f2f311f..d61af63 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -22,16 +22,6 @@ import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; -import java.math.BigDecimal; -import java.sql.Timestamp; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Deque; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; @@ -64,6 +54,16 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Deque; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + import parquet.filter2.predicate.FilterApi; import parquet.filter2.predicate.FilterPredicate; @@ -415,6 +415,8 @@ Operator getOperator() { } static class ExpressionBuilder { + // max threshold for CNF conversion. having >8 elements in andList will be converted to maybe + private static final int CNF_COMBINATIONS_THRESHOLD = 256; private final List leaves = new ArrayList(); /** @@ -844,14 +846,29 @@ static ExpressionTree convertToCNF(ExpressionTree root) { } } if (!andList.isEmpty()) { - root = new ExpressionTree(ExpressionTree.Operator.AND); - generateAllCombinations(root.children, andList, nonAndList); + if (checkCombinationsThreshold(andList)) { + root = new ExpressionTree(ExpressionTree.Operator.AND); + generateAllCombinations(root.children, andList, nonAndList); + } else { + root = new ExpressionTree(TruthValue.YES_NO_NULL); + } } } } return root; } + private static boolean checkCombinationsThreshold(List andList) { + int numComb = 1; + for (ExpressionTree tree : andList) { + numComb *= tree.children.size(); + if (numComb > CNF_COMBINATIONS_THRESHOLD) { + return false; + } + } + return true; + } + /** * Converts multi-level ands and ors into single level ones. * @param root the expression to flatten diff --git ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java index 323fd33..22eea0b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java @@ -18,7 +18,12 @@ package org.apache.hadoop.hive.ql.io.sarg; +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertNull; +import static junit.framework.Assert.assertTrue; + import com.google.common.collect.Sets; + import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; @@ -27,8 +32,7 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionTree; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.junit.*; -import parquet.filter2.predicate.FilterPredicate; +import org.junit.Test; import java.beans.XMLDecoder; import java.io.ByteArrayInputStream; @@ -37,9 +41,7 @@ import java.util.List; import java.util.Set; -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertNull; -import static junit.framework.Assert.assertTrue; +import parquet.filter2.predicate.FilterPredicate; /** * These test the SARG implementation. @@ -175,6 +177,17 @@ public void testFoldMaybe() throws Exception { assertEquals("(and leaf-1)", ExpressionBuilder.foldMaybe(and(or(leaf(2), constant(TruthValue.YES_NO_NULL)), leaf(1))).toString()); + assertEquals("(and leaf-100)", ExpressionBuilder.foldMaybe( + ExpressionBuilder.convertToCNF(and(leaf(100), + or(and(leaf(0), leaf(1)), + and(leaf(2), leaf(3)), + and(leaf(4), leaf(5)), + and(leaf(6), leaf(7)), + and(leaf(8), leaf(9)), + and(leaf(10), leaf(11)), + and(leaf(12), leaf(13)), + and(leaf(14), leaf(15)), + and(leaf(16), leaf(17)))))).toString()); } @Test @@ -236,6 +249,25 @@ public void testCNF() throws Exception { and(leaf(3), leaf(4), leaf(5)), and(leaf(6), leaf(7)), leaf(8))).toString()); + assertEquals("YES_NO_NULL", ExpressionBuilder.convertToCNF(or(and(leaf(0), leaf(1)), + and(leaf(2), leaf(3)), + and(leaf(4), leaf(5)), + and(leaf(6), leaf(7)), + and(leaf(8), leaf(9)), + and(leaf(10), leaf(11)), + and(leaf(12), leaf(13)), + and(leaf(14), leaf(15)), + and(leaf(16), leaf(17)))).toString()); + assertEquals("(and leaf-100 YES_NO_NULL)", ExpressionBuilder.convertToCNF(and(leaf(100), + or(and(leaf(0), leaf(1)), + and(leaf(2), leaf(3)), + and(leaf(4), leaf(5)), + and(leaf(6), leaf(7)), + and(leaf(8), leaf(9)), + and(leaf(10), leaf(11)), + and(leaf(12), leaf(13)), + and(leaf(14), leaf(15)), + and(leaf(16), leaf(17))))).toString()); assertNoSharedNodes(ExpressionBuilder.convertToCNF(or(and(leaf(0), leaf(1), leaf(2)), and(leaf(3), leaf(4), leaf(5)), and(leaf(6), leaf(7)),