diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index eeb9641..f3d198a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -18,15 +18,9 @@ package org.apache.hadoop.hive.ql.io.sarg; -import java.math.BigDecimal; -import java.sql.Timestamp; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Deque; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; @@ -56,9 +50,15 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Input; -import com.esotericsoftware.kryo.io.Output; +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Deque; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * The implementation of SearchArguments. @@ -300,6 +300,8 @@ Operator getOperator() { } static class ExpressionBuilder { + // max threshold for CNF conversion. having >8 elements in andList will be converted to maybe + private static final int CNF_COMBINATIONS_THRESHOLD = 256; private final List leaves = new ArrayList(); /** @@ -725,14 +727,29 @@ static ExpressionTree convertToCNF(ExpressionTree root) { } } if (!andList.isEmpty()) { - root = new ExpressionTree(ExpressionTree.Operator.AND); - generateAllCombinations(root.children, andList, nonAndList); + if (checkCombinationsThreshold(andList)) { + root = new ExpressionTree(ExpressionTree.Operator.AND); + generateAllCombinations(root.children, andList, nonAndList); + } else { + root = new ExpressionTree(TruthValue.YES_NO_NULL); + } } } } return root; } + private static boolean checkCombinationsThreshold(List andList) { + int numComb = 1; + for (ExpressionTree tree : andList) { + numComb *= tree.children.size(); + if (numComb > CNF_COMBINATIONS_THRESHOLD) { + return false; + } + } + return true; + } + /** * Converts multi-level ands and ors into single level ones. * @param root the expression to flatten diff --git ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java index c91644c..6292a6b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java @@ -18,12 +18,14 @@ package org.apache.hadoop.hive.ql.io.sarg; +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertTrue; + import com.google.common.collect.Sets; + import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionBuilder; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionTree; @@ -38,9 +40,6 @@ import java.util.List; import java.util.Set; -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertTrue; - /** * These test the SARG implementation. * The xml files were generated by setting hive.optimize.index.filter @@ -176,6 +175,17 @@ public void testFoldMaybe() throws Exception { assertEquals("(and leaf-1)", ExpressionBuilder.foldMaybe(and(or(leaf(2), constant(TruthValue.YES_NO_NULL)), leaf(1))).toString()); + assertEquals("(and leaf-100)", ExpressionBuilder.foldMaybe( + ExpressionBuilder.convertToCNF(and(leaf(100), + or(and(leaf(0), leaf(1)), + and(leaf(2), leaf(3)), + and(leaf(4), leaf(5)), + and(leaf(6), leaf(7)), + and(leaf(8), leaf(9)), + and(leaf(10), leaf(11)), + and(leaf(12), leaf(13)), + and(leaf(14), leaf(15)), + and(leaf(16), leaf(17)))))).toString()); } @Test @@ -237,6 +247,25 @@ public void testCNF() throws Exception { and(leaf(3), leaf(4), leaf(5)), and(leaf(6), leaf(7)), leaf(8))).toString()); + assertEquals("YES_NO_NULL", ExpressionBuilder.convertToCNF(or(and(leaf(0), leaf(1)), + and(leaf(2), leaf(3)), + and(leaf(4), leaf(5)), + and(leaf(6), leaf(7)), + and(leaf(8), leaf(9)), + and(leaf(10), leaf(11)), + and(leaf(12), leaf(13)), + and(leaf(14), leaf(15)), + and(leaf(16), leaf(17)))).toString()); + assertEquals("(and leaf-100 YES_NO_NULL)", ExpressionBuilder.convertToCNF(and(leaf(100), + or(and(leaf(0), leaf(1)), + and(leaf(2), leaf(3)), + and(leaf(4), leaf(5)), + and(leaf(6), leaf(7)), + and(leaf(8), leaf(9)), + and(leaf(10), leaf(11)), + and(leaf(12), leaf(13)), + and(leaf(14), leaf(15)), + and(leaf(16), leaf(17))))).toString()); assertNoSharedNodes(ExpressionBuilder.convertToCNF(or(and(leaf(0), leaf(1), leaf(2)), and(leaf(3), leaf(4), leaf(5)), and(leaf(6), leaf(7)),