diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index bde9fc27a87b78c5478dabc8b01e995b868e5563..293a914ebea2787dd10c7752bc516c4fb3552f6b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -30,7 +30,6 @@ import java.util.List; import java.util.Map; -import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -280,9 +279,9 @@ static Object getMax(ColumnStatistics index) { return ((TimestampColumnStatistics) index).getMaximum(); } else if (index instanceof BooleanColumnStatistics) { if (((BooleanColumnStatistics)index).getTrueCount()!=0) { - return "true"; + return Boolean.TRUE; } else { - return "false"; + return Boolean.FALSE; } } else { return null; @@ -310,9 +309,9 @@ static Object getMin(ColumnStatistics index) { return ((TimestampColumnStatistics) index).getMinimum(); } else if (index instanceof BooleanColumnStatistics) { if (((BooleanColumnStatistics)index).getFalseCount()!=0) { - return "false"; + return Boolean.FALSE; } else { - return "true"; + return Boolean.TRUE; } } else { return null; @@ -367,18 +366,12 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, } TruthValue result; - // Predicate object and stats object can be one of the following base types - // LONG, DOUBLE, STRING, DATE, DECIMAL - // Out of these DATE is not implicitly convertible to other types and rest - // others are implicitly convertible. In cases where DATE cannot be converted - // the stats object is converted to text and comparison is performed. - // When STRINGs are converted to other base types, NumberFormat exception - // can occur in which case TruthValue.YES_NO_NULL value is returned try { - Object baseObj = predicate.getLiteral(PredicateLeaf.FileFormat.ORC); - Object minValue = getConvertedStatsObj(min, baseObj); - Object maxValue = getConvertedStatsObj(max, baseObj); - Object predObj = getBaseObjectForComparison(baseObj, minValue); + // Predicate object and stats objects are converted to the type of the predicate object. + Object baseObj = predicate.getLiteral(); + Object minValue = getBaseObjectForComparison(predicate.getType(), min); + Object maxValue = getBaseObjectForComparison(predicate.getType(), max); + Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj); result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull); if (bloomFilter != null && result != TruthValue.NO_NULL && result != TruthValue.NO) { @@ -440,8 +433,8 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec if (minValue.equals(maxValue)) { // for a single value, look through to see if that value is in the // set - for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) { - predObj = getBaseObjectForComparison(arg, minValue); + for (Object arg : predicate.getLiteralList()) { + predObj = getBaseObjectForComparison(predicate.getType(), arg); loc = compareToRange((Comparable) predObj, minValue, maxValue); if (loc == Location.MIN) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; @@ -450,8 +443,8 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec return hasNull ? TruthValue.NO_NULL : TruthValue.NO; } else { // are all of the values outside of the range? - for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) { - predObj = getBaseObjectForComparison(arg, minValue); + for (Object arg : predicate.getLiteralList()) { + predObj = getBaseObjectForComparison(predicate.getType(), arg); loc = compareToRange((Comparable) predObj, minValue, maxValue); if (loc == Location.MIN || loc == Location.MIDDLE || loc == Location.MAX) { @@ -461,12 +454,12 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec return hasNull ? TruthValue.NO_NULL : TruthValue.NO; } case BETWEEN: - List args = predicate.getLiteralList(PredicateLeaf.FileFormat.ORC); - Object predObj1 = getBaseObjectForComparison(args.get(0), minValue); + List args = predicate.getLiteralList(); + Object predObj1 = getBaseObjectForComparison(predicate.getType(), args.get(0)); loc = compareToRange((Comparable) predObj1, minValue, maxValue); if (loc == Location.BEFORE || loc == Location.MIN) { - Object predObj2 = getBaseObjectForComparison(args.get(1), minValue); + Object predObj2 = getBaseObjectForComparison(predicate.getType(), args.get(1)); Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue); if (loc2 == Location.AFTER || loc2 == Location.MAX) { @@ -489,8 +482,8 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec } } - private static TruthValue evaluatePredicateBloomFilter(PredicateLeaf predicate, Object predObj, - BloomFilter bloomFilter, boolean hasNull) { + private static TruthValue evaluatePredicateBloomFilter(PredicateLeaf predicate, + final Object predObj, BloomFilter bloomFilter, boolean hasNull) { switch (predicate.getOperator()) { case NULL_SAFE_EQUALS: // null safe equals does not return *_NULL variant. So set hasNull to false @@ -498,9 +491,10 @@ private static TruthValue evaluatePredicateBloomFilter(PredicateLeaf predicate, case EQUALS: return checkInBloomFilter(bloomFilter, predObj, hasNull); case IN: - for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) { + for (Object arg : predicate.getLiteralList()) { // if atleast one value in IN list exist in bloom filter, qualify the row group/stripe - TruthValue result = checkInBloomFilter(bloomFilter, arg, hasNull); + Object predObjItem = getBaseObjectForComparison(predicate.getType(), arg); + TruthValue result = checkInBloomFilter(bloomFilter, predObjItem, hasNull); if (result == TruthValue.YES_NO_NULL || result == TruthValue.YES_NO) { return result; } @@ -527,14 +521,6 @@ private static TruthValue checkInBloomFilter(BloomFilter bf, Object predObj, boo if (bf.testString(predObj.toString())) { result = TruthValue.YES_NO_NULL; } - } else if (predObj instanceof Date) { - if (bf.testLong(DateWritable.dateToDays((Date) predObj))) { - result = TruthValue.YES_NO_NULL; - } - } else if (predObj instanceof DateWritable) { - if (bf.testLong(((DateWritable) predObj).getDays())) { - result = TruthValue.YES_NO_NULL; - } } else if (predObj instanceof Timestamp) { if (bf.testLong(((Timestamp) predObj).getTime())) { result = TruthValue.YES_NO_NULL; @@ -543,14 +529,22 @@ private static TruthValue checkInBloomFilter(BloomFilter bf, Object predObj, boo if (bf.testLong(((TimestampWritable) predObj).getTimestamp().getTime())) { result = TruthValue.YES_NO_NULL; } - } else { - // if the predicate object is null and if hasNull says there are no nulls then return NO - if (predObj == null && !hasNull) { - result = TruthValue.NO; - } else { + } else if (predObj instanceof Date) { + if (bf.testLong(DateWritable.dateToDays((Date) predObj))) { result = TruthValue.YES_NO_NULL; } - } + } else if (predObj instanceof DateWritable) { + if (bf.testLong(((DateWritable) predObj).getDays())) { + result = TruthValue.YES_NO_NULL; + } + } else { + // if the predicate object is null and if hasNull says there are no nulls then return NO + if (predObj == null && !hasNull) { + result = TruthValue.NO; + } else { + result = TruthValue.YES_NO_NULL; + } + } if (result == TruthValue.YES_NO_NULL && !hasNull) { result = TruthValue.YES_NO; @@ -563,58 +557,112 @@ private static TruthValue checkInBloomFilter(BloomFilter bf, Object predObj, boo return result; } - private static Object getBaseObjectForComparison(Object predObj, Object statsObj) { - if (predObj != null) { - if (predObj instanceof ExprNodeConstantDesc) { - predObj = ((ExprNodeConstantDesc) predObj).getValue(); + private static Object getBaseObjectForComparison(PredicateLeaf.Type type, Object obj) { + if (obj != null) { + if (obj instanceof ExprNodeConstantDesc) { + obj = ((ExprNodeConstantDesc) obj).getValue(); } - // following are implicitly convertible - if (statsObj instanceof Long) { - if (predObj instanceof Double) { - return ((Double) predObj).longValue(); - } else if (predObj instanceof HiveDecimal) { - return ((HiveDecimal) predObj).longValue(); - } else if (predObj instanceof String) { - return Long.valueOf(predObj.toString()); + } else { + return null; + } + switch (type) { + case BOOLEAN: + if (obj instanceof Boolean) { + return obj; + } else { + // will only be true if the string conversion yields "true", all other values are + // considered false + return Boolean.valueOf(obj.toString()); } - } else if (statsObj instanceof Double) { - if (predObj instanceof Long) { - return ((Long) predObj).doubleValue(); - } else if (predObj instanceof HiveDecimal) { - return ((HiveDecimal) predObj).doubleValue(); - } else if (predObj instanceof String) { - return Double.valueOf(predObj.toString()); + case DATE: + if (obj instanceof Date) { + return obj; + } else if (obj instanceof DateWritable) { + return ((DateWritable) obj).get(); + } else if (obj instanceof String) { + return Date.valueOf((String) obj); + } else if (obj instanceof Timestamp) { + return DateWritable.timeToDate(((Timestamp) obj).getTime() / 1000L).toString(); } - } else if (statsObj instanceof String) { - return predObj.toString(); - } else if (statsObj instanceof HiveDecimal) { - if (predObj instanceof Long) { - return HiveDecimal.create(((Long) predObj)); - } else if (predObj instanceof Double) { - return HiveDecimal.create(predObj.toString()); - } else if (predObj instanceof String) { - return HiveDecimal.create(predObj.toString()); - } else if (predObj instanceof BigDecimal) { - return HiveDecimal.create((BigDecimal)predObj); + // always string, but prevent the comparison to numbers (are they days/seconds/milliseconds?) + break; + case DECIMAL: + if (obj instanceof Boolean) { + return ((Boolean) obj).booleanValue() ? HiveDecimal.ONE : HiveDecimal.ZERO; + } else if (obj instanceof Integer) { + return HiveDecimal.create(((Integer) obj).intValue()); + } else if (obj instanceof Long) { + return HiveDecimal.create(((Long) obj)); + } else if (obj instanceof Float || obj instanceof Double || + obj instanceof String || obj instanceof Timestamp) { + return HiveDecimal.create(obj.toString()); + } else if (obj instanceof BigDecimal) { + return HiveDecimal.create((BigDecimal) obj); + } else if (obj instanceof HiveDecimal) { + return obj; } - } - } - return predObj; - } - - private static Object getConvertedStatsObj(Object statsObj, Object predObj) { - - // converting between date and other types is not implicit, so convert to - // text for comparison - if (((predObj instanceof DateWritable) && !(statsObj instanceof DateWritable)) - || ((statsObj instanceof DateWritable) && !(predObj instanceof DateWritable))) { - return StringUtils.stripEnd(statsObj.toString(), null); + break; + case FLOAT: + if (obj instanceof Number) { + // widening conversion + return ((Number) obj).doubleValue(); + } else if (obj instanceof HiveDecimal) { + return ((HiveDecimal) obj).doubleValue(); + } else if (obj instanceof String) { + return Double.valueOf(obj.toString()); + } else if (obj instanceof Timestamp) { + return new TimestampWritable((Timestamp)obj).getDouble(); + } else if (obj instanceof HiveDecimal) { + return ((HiveDecimal) obj).doubleValue(); + } else if (obj instanceof BigDecimal) { + return ((BigDecimal) obj).doubleValue(); + } + break; + case INTEGER: + // fall through + case LONG: + if (obj instanceof Number) { + // widening conversion + return ((Number) obj).longValue(); + } else if (obj instanceof HiveDecimal) { + return ((HiveDecimal) obj).longValue(); + } else if (obj instanceof String) { + return Long.valueOf(obj.toString()); + } else if (obj instanceof Timestamp) { + return ((Timestamp) obj).getTime() / 1000L; + } + break; + case STRING: + if (obj != null) { + return (obj.toString()); + } + break; + case TIMESTAMP: + if (obj instanceof Timestamp) { + return obj; + } else if (obj instanceof Float) { + return TimestampWritable.doubleToTimestamp(((Float) obj).doubleValue()); + } else if (obj instanceof Double) { + return TimestampWritable.doubleToTimestamp(((Double) obj).doubleValue()); + } else if (obj instanceof HiveDecimal) { + return TimestampWritable.decimalToTimestamp((HiveDecimal) obj); + } else if (obj instanceof Date) { + return new Timestamp(((Date) obj).getTime()); + } else if (obj instanceof DateWritable) { + return new Timestamp(((DateWritable) obj).get().getTime()); + } + // float/double conversion to timestamp is interpreted as seconds whereas integer conversion + // to timestamp is interpreted as milliseconds by default. The integer to timestamp casting + // is also config driven. The filter operator changes its promotion based on config: + // "int.timestamp.conversion.in.seconds". Disable PPD for integer cases. + break; + default: + break; } - if (statsObj instanceof String) { - return StringUtils.stripEnd(statsObj.toString(), null); - } - return statsObj; + throw new IllegalArgumentException(String.format( + "ORC SARGS could not convert from %s to %s", obj == null ? "(null)" : obj.getClass() + .getSimpleName(), type)); } public static class SargApplier { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index a451bfb339470024074c4e70ea6ef38618a3270c..efe03ab099e741e2c69373c9e81c5cb4db3f581b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -18,9 +18,15 @@ package org.apache.hadoop.hive.ql.io.sarg; -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Input; -import com.esotericsoftware.kryo.io.Output; +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Deque; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; @@ -54,15 +60,9 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import java.math.BigDecimal; -import java.sql.Timestamp; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Deque; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; import parquet.filter2.predicate.FilterApi; import parquet.filter2.predicate.FilterPredicate; @@ -116,55 +116,12 @@ public String getColumnName() { } @Override - public Object getLiteral(FileFormat format) { - // To get around a kryo 2.22 bug while deserialize a Timestamp into Date - // (https://github.com/EsotericSoftware/kryo/issues/88) - // When we see a Date, convert back into Timestamp - if (literal instanceof java.util.Date) { - return new Timestamp(((java.util.Date) literal).getTime()); - } - - switch (format) { - case ORC: - // adapt base type to what orc needs - if (literal instanceof Integer) { - return ((Number) literal).longValue(); - } - return literal; - case PARQUET: - return literal; - default: - throw new RuntimeException( - "File format " + format + "is not support to build search arguments"); - } + public Object getLiteral() { + return literal; } @Override - public List getLiteralList(FileFormat format) { - switch (format) { - case ORC: - return getOrcLiteralList(); - case PARQUET: - return getParquetLiteralList(); - default: - throw new RuntimeException("File format is not support to build search arguments"); - } - } - - private List getOrcLiteralList() { - // no need to cast - if (literalList == null || literalList.size() == 0 || !(literalList.get(0) instanceof - Integer)) { - return literalList; - } - List result = new ArrayList(literalList.size()); - for (Object o : literalList) { - result.add(((Number) o).longValue()); - } - return result; - } - - private List getParquetLiteralList() { + public List getLiteralList() { return literalList; } @@ -350,13 +307,17 @@ private FilterPredicate buildFilterPredicateFromPredicateLeaf(PredicateLeaf leaf try { builder = leafFilterFactory .getLeafFilterBuilderByType(leaf.getType()); - if (builder == null) return null; + if (builder == null) { + return null; + } if (isMultiLiteralsOperator(leaf.getOperator())) { - return builder.buildPredicate(leaf.getOperator(), leaf.getLiteralList( - PredicateLeaf.FileFormat.PARQUET), leaf.getColumnName()); + return builder.buildPredicate(leaf.getOperator(), + leaf.getLiteralList(), + leaf.getColumnName()); } else { return builder - .buildPredict(leaf.getOperator(), leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET), + .buildPredict(leaf.getOperator(), + leaf.getLiteral(), leaf.getColumnName()); } } catch (Exception e) { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java index a50def12119c5fb38176b7659c40daf4467673ed..83f99ea2552d83d86c9b9711f0eddb4fd07a1c36 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java @@ -347,12 +347,12 @@ public void testPredEvalWithIntStats() throws Exception { pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); - assertEquals(TruthValue.YES_NO, + assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15), null); - assertEquals(TruthValue.NO, + assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, @@ -375,12 +375,12 @@ public void testPredEvalWithDoubleStats() throws Exception { pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); - assertEquals(TruthValue.YES_NO, + assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15), null); - assertEquals(TruthValue.NO, + assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, @@ -408,7 +408,7 @@ public void testPredEvalWithStringStats() throws Exception { pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(100), null); - assertEquals(TruthValue.NO, + assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, @@ -421,12 +421,12 @@ public void testPredEvalWithStringStats() throws Exception { public void testPredEvalWithDateStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); - assertEquals(TruthValue.NO, + assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); - assertEquals(TruthValue.NO, + assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, @@ -471,7 +471,7 @@ public void testPredEvalWithDateStats() throws Exception { pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); - assertEquals(TruthValue.NO, + assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); } @@ -490,12 +490,12 @@ public void testPredEvalWithDecimalStats() throws Exception { pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); - assertEquals(TruthValue.YES_NO, + assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15), null); - assertEquals(TruthValue.NO, + assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, @@ -1313,7 +1313,7 @@ public void testDateWritableNullSafeEqualsBloomFilter() throws Exception { assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); bf.addLong((new DateWritable(15)).getDays()); - assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); + assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); } @Test @@ -1328,7 +1328,7 @@ public void testDateWritableEqualsBloomFilter() throws Exception { assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); bf.addLong((new DateWritable(15)).getDays()); - assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); + assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); } @Test @@ -1347,10 +1347,10 @@ public void testDateWritableInBloomFilter() throws Exception { assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); bf.addLong((new DateWritable(19)).getDays()); - assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); + assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); bf.addLong((new DateWritable(15)).getDays()); - assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); + assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); } @Test diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java index 22eea0b55cf2c53eef69a1ab62acb315fab8d7d9..5e61abafab86d1b5332f9f36fc4365eb5447d815 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java @@ -802,64 +802,55 @@ public void testExpression1() throws Exception { assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("john", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("john", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("john", leaf.getLiteral()); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("greg", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("greg", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("greg", leaf.getLiteral()); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("alan", leaf.getLiteral()); leaf = leaves.get(3); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(12, leaf.getLiteral()); leaf = leaves.get(4); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(13L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(13, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(13, leaf.getLiteral()); leaf = leaves.get(5); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(15L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(15, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(15, leaf.getLiteral()); leaf = leaves.get(6); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(16L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(16, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(16, leaf.getLiteral()); leaf = leaves.get(7); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(30L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(30, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(30, leaf.getLiteral()); leaf = leaves.get(8); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("owen", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("owen", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("owen", leaf.getLiteral()); assertEquals("(and (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" + " (not leaf-4) leaf-5 leaf-6 leaf-7)" + @@ -1090,31 +1081,26 @@ public void testExpression2() throws Exception { assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.IS_NULL, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); - assertEquals(null, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC)); - assertEquals(null, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(null, leaf.getLiteral()); + assertEquals(null, leaf.getLiteralList()); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("sue", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("sue", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("sue", leaf.getLiteral()); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(12, leaf.getLiteral()); leaf = leaves.get(3); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(4L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(4, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(4, leaf.getLiteral()); assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)", sarg.getExpression().toString()); @@ -1525,26 +1511,21 @@ public void testExpression3() throws Exception { assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); - assertEquals(23L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0)); - assertEquals(23, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0)); - assertEquals(45L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1)); - assertEquals(45, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1)); + assertEquals(null, leaf.getLiteral()); + assertEquals(23, leaf.getLiteralList().get(0)); + assertEquals(45, leaf.getLiteralList().get(1)); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("alan", leaf.getLiteral()); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("last_name", leaf.getColumnName()); - assertEquals("smith", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("smith", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("smith", leaf.getLiteral()); assertEquals("(and leaf-0 leaf-1 leaf-2)", sarg.getExpression().toString()); @@ -1751,26 +1732,21 @@ id in (34,50) */ assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(12, leaf.getLiteral()); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("john", leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0)); - assertEquals("sue", leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1)); - assertEquals("john", leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0)); - assertEquals("sue", leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1)); + assertEquals("john", leaf.getLiteralList().get(0)); + assertEquals("sue", leaf.getLiteralList().get(1)); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(34L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0)); - assertEquals(50L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1)); - assertEquals(34, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0)); - assertEquals(50, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1)); + assertEquals(34, leaf.getLiteralList().get(0)); + assertEquals(50, leaf.getLiteralList().get(1)); assertEquals("(and (not leaf-0) leaf-1 leaf-2)", sarg.getExpression().toString()); @@ -2016,8 +1992,6 @@ public void testExpression5() throws Exception { assertEquals(PredicateLeaf.Operator.BETWEEN, leaves.get(0).getOperator()); assertEquals("first_name", leaves.get(0).getColumnName()); - assertEquals("david", leaves.get(0).getLiteralList(PredicateLeaf.FileFormat.ORC).get(0)); - assertEquals("greg", leaves.get(0).getLiteralList(PredicateLeaf.FileFormat.ORC).get(1)); assertEquals("leaf-0", sarg.getExpression().toString()); @@ -2515,64 +2489,55 @@ public void testExpression7() throws Exception { assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(18L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(18, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(18, leaf.getLiteral()); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(10L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(10, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(10, leaf.getLiteral()); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(13L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(13, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(13, leaf.getLiteral()); leaf = leaves.get(3); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(16L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(16, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(16, leaf.getLiteral()); leaf = leaves.get(4); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(11L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(11, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(11, leaf.getLiteral()); leaf = leaves.get(5); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(12, leaf.getLiteral()); leaf = leaves.get(6); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(14L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(14, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(14, leaf.getLiteral()); leaf = leaves.get(7); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(15L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(15, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(15, leaf.getLiteral()); leaf = leaves.get(8); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(17L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(17, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(17, leaf.getLiteral()); assertEquals("(and" + " (or leaf-0 leaf-1 leaf-2 leaf-3)" + @@ -2917,8 +2882,7 @@ public void testExpression10() throws Exception { assertEquals(PredicateLeaf.Operator.LESS_THAN, leaves.get(0).getOperator()); assertEquals("id", leaves.get(0).getColumnName()); - assertEquals(10L, leaves.get(0).getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(10, leaves.get(0).getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(10, leaves.get(0).getLiteral()); assertEquals("(and (not leaf-0) (not leaf-0))", sarg.getExpression().toString()); diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java index 41a14c20274de7d8d6168d8d99916d416df5a7ef..0a95363f04b32c31b541a84afd57ec10d26bdb55 100644 --- a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java +++ b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java @@ -54,14 +54,6 @@ } /** - * file format which supports search arguments - */ - public static enum FileFormat { - ORC, - PARQUET - } - - /** * Get the operator for the leaf. */ public Operator getOperator(); @@ -79,16 +71,17 @@ /** * Get the literal half of the predicate leaf. Adapt the original type for what orc needs - * @return a Long, Double, or String for Orc and a Int, Long, Double, or String for parquet + * + * @return an Integer, Long, Double, or String */ - public Object getLiteral(FileFormat format); + public Object getLiteral(); /** * For operators with multiple literals (IN and BETWEEN), get the literals. * - * @return the list of literals (Longs, Doubles, or Strings) for orc or the list of literals - * (Integer, Longs, Doubles, or String) for parquet + * @return the list of literals (Integer, Longs, Doubles, or Strings) + * */ - public List getLiteralList(FileFormat format); + public List getLiteralList(); }