diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index d189dde..4d58cfc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -809,10 +809,10 @@ private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, // column statistics at index 0 contains only the number of rows ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred] + 1]; - Object minValue = getMin(stats); - Object maxValue = getMax(stats); - truthValues[pred] = RecordReaderImpl.evaluatePredicateRange(predLeaves.get(pred), - minValue, maxValue); + Object minValue = RecordReaderImpl.getMin(stats); + Object maxValue = RecordReaderImpl.getMax(stats); + PredicateLeaf predLeaf = predLeaves.get(pred); + truthValues[pred] = RecordReaderImpl.evaluatePredicateRange(predLeaf, minValue, maxValue); } else { // parition column case. diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 173fb8d..c774a76 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -28,12 +28,15 @@ import java.util.List; import java.util.Map; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -1998,57 +2001,47 @@ private static TreeReader createTreeReader(Path path, } /** - * Get the minimum value out of an index entry. - * @param index the index entry - * @return the object for the minimum value or null if there isn't one + * Get the maximum value out of an index entry. + * @param index + * the index entry + * @return the object for the maximum value or null if there isn't one */ - static Object getMin(OrcProto.ColumnStatistics index) { - if (index.hasIntStatistics()) { - OrcProto.IntegerStatistics stat = index.getIntStatistics(); - if (stat.hasMinimum()) { - return stat.getMinimum(); - } - } - if (index.hasStringStatistics()) { - OrcProto.StringStatistics stat = index.getStringStatistics(); - if (stat.hasMinimum()) { - return stat.getMinimum(); - } - } - if (index.hasDoubleStatistics()) { - OrcProto.DoubleStatistics stat = index.getDoubleStatistics(); - if (stat.hasMinimum()) { - return stat.getMinimum(); - } + static Object getMax(ColumnStatistics index) { + if (index instanceof IntegerColumnStatistics) { + return ((IntegerColumnStatistics) index).getMaximum(); + } else if (index instanceof DoubleColumnStatistics) { + return ((DoubleColumnStatistics) index).getMaximum(); + } else if (index instanceof StringColumnStatistics) { + return ((StringColumnStatistics) index).getMaximum(); + } else if (index instanceof DateColumnStatistics) { + return ((DateColumnStatistics) index).getMaximum(); + } else if (index instanceof DecimalColumnStatistics) { + return ((DecimalColumnStatistics) index).getMaximum(); + } else { + return null; } - return null; } /** - * Get the maximum value out of an index entry. - * @param index the index entry - * @return the object for the maximum value or null if there isn't one + * Get the minimum value out of an index entry. + * @param index + * the index entry + * @return the object for the minimum value or null if there isn't one */ - static Object getMax(OrcProto.ColumnStatistics index) { - if (index.hasIntStatistics()) { - OrcProto.IntegerStatistics stat = index.getIntStatistics(); - if (stat.hasMaximum()) { - return stat.getMaximum(); - } - } - if (index.hasStringStatistics()) { - OrcProto.StringStatistics stat = index.getStringStatistics(); - if (stat.hasMaximum()) { - return stat.getMaximum(); - } - } - if (index.hasDoubleStatistics()) { - OrcProto.DoubleStatistics stat = index.getDoubleStatistics(); - if (stat.hasMaximum()) { - return stat.getMaximum(); - } + static Object getMin(ColumnStatistics index) { + if (index instanceof IntegerColumnStatistics) { + return ((IntegerColumnStatistics) index).getMinimum(); + } else if (index instanceof DoubleColumnStatistics) { + return ((DoubleColumnStatistics) index).getMinimum(); + } else if (index instanceof StringColumnStatistics) { + return ((StringColumnStatistics) index).getMinimum(); + } else if (index instanceof DateColumnStatistics) { + return ((DateColumnStatistics) index).getMinimum(); + } else if (index instanceof DecimalColumnStatistics) { + return ((DecimalColumnStatistics) index).getMinimum(); + } else { + return null; } - return null; } /** @@ -2061,7 +2054,8 @@ static Object getMax(OrcProto.ColumnStatistics index) { */ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics index, PredicateLeaf predicate) { - Object minValue = getMin(index); + ColumnStatistics cs = ColumnStatisticsImpl.deserialize(index); + Object minValue = getMin(cs); // if we didn't have any values, everything must have been null if (minValue == null) { if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) { @@ -2070,13 +2064,27 @@ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics index, return TruthValue.NULL; } } - Object maxValue = getMax(index); + Object maxValue = getMax(cs); return evaluatePredicateRange(predicate, minValue, maxValue); } - static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object minValue, - Object maxValue) { + static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, + Object max) { Location loc; + Object predObj = predicate.getLiteral(); + + // column statistics for char/varchar columns are stored as strings, so convert char/varchar + // type predicates to string + if (predObj instanceof HiveChar) { + Object hObj = (HiveChar) predObj; + predObj = hObj.toString(); + } else if (predObj instanceof HiveVarchar) { + Object hObj = (HiveVarchar) predObj; + predObj = hObj.toString(); + } + Object minValue = getPrimitiveObject(predObj, min); + Object maxValue = getPrimitiveObject(predObj, max); + switch (predicate.getOperator()) { case NULL_SAFE_EQUALS: loc = compareToRange((Comparable) predicate.getLiteral(), @@ -2163,6 +2171,38 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object minValu } } + private static Object getPrimitiveObject(Object predObj, Object obj) { + if (obj instanceof DateWritable) { + DateWritable dobj = (DateWritable) obj; + if (predObj instanceof String || predObj instanceof HiveChar + || predObj instanceof HiveVarchar) { + return dobj.toString(); + } + } else if (obj instanceof HiveDecimal) { + HiveDecimal hdObj = (HiveDecimal) obj; + if (predObj instanceof Float) { + return hdObj.floatValue(); + } else if (predObj instanceof Double) { + return hdObj.doubleValue(); + } else if (predObj instanceof Short) { + return hdObj.shortValue(); + } else if (predObj instanceof Integer) { + return hdObj.intValue(); + } else if (predObj instanceof Long) { + return hdObj.longValue(); + } else if (predObj instanceof String || predObj instanceof HiveChar + || predObj instanceof HiveVarchar) { + // primitive type of char/varchar is Text (i.e trailing white spaces trimmed string) + return StringUtils.stripEnd(hdObj.toString(), null); + } + } else if (obj instanceof String || obj instanceof HiveChar || obj instanceof HiveVarchar) { + // primitive type of char/varchar is Text (i.e trailing white spaces trimmed string) + return StringUtils.stripEnd(obj.toString(), null); + } + + return obj; + } + /** * Pick the row groups that we need to load from the current stripe. * @return an array with a boolean for each row group or null if all of the diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java index 490e92c..922b99f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java @@ -45,7 +45,11 @@ public static enum Type { INTEGER, // all of the integer types FLOAT, // float and double - STRING + STRING, + DATE, + DECIMAL, + CHAR, + VARCHAR } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index 1663d78..4f26d3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -283,11 +283,19 @@ Operator getOperator() { case INT: case LONG: return PredicateLeaf.Type.INTEGER; + case CHAR: + return PredicateLeaf.Type.CHAR; + case VARCHAR: + return PredicateLeaf.Type.VARCHAR; case STRING: return PredicateLeaf.Type.STRING; case FLOAT: case DOUBLE: return PredicateLeaf.Type.FLOAT; + case DATE: + return PredicateLeaf.Type.DATE; + case DECIMAL: + return PredicateLeaf.Type.DECIMAL; default: } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java index 1762073..f10b872 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java @@ -18,21 +18,21 @@ package org.apache.hadoop.hive.ql.io.orc; -import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; -import org.apache.hadoop.hive.ql.io.sarg.TestSearchArgumentImpl; -import org.junit.Test; - -import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.Location; - -import java.util.ArrayList; -import java.util.List; - import static junit.framework.Assert.assertEquals; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.Location; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; +import org.apache.hadoop.hive.ql.io.sarg.TestSearchArgumentImpl; +import org.junit.Test; + public class TestRecordReaderImpl { @Test @@ -76,25 +76,44 @@ public void testCompareToRangeString() throws Exception { } @Test + public void testCompareToCharNeedConvert() throws Exception { + assertEquals(Location.BEFORE, + RecordReaderImpl.compareToRange("apple", "hello", "world")); + assertEquals(Location.AFTER, + RecordReaderImpl.compareToRange("zombie", "hello", "world")); + assertEquals(Location.MIN, + RecordReaderImpl.compareToRange("hello", "hello", "world")); + assertEquals(Location.MIDDLE, + RecordReaderImpl.compareToRange("pilot", "hello", "world")); + assertEquals(Location.MAX, + RecordReaderImpl.compareToRange("world", "hello", "world")); + assertEquals(Location.BEFORE, + RecordReaderImpl.compareToRange("apple", "hello", "hello")); + assertEquals(Location.MIN, + RecordReaderImpl.compareToRange("hello", "hello", "hello")); + assertEquals(Location.AFTER, + RecordReaderImpl.compareToRange("zombie", "hello", "hello")); + } + + @Test public void testGetMin() throws Exception { - assertEquals(null, RecordReaderImpl.getMin(createIntStats(null, null))); - assertEquals(10L, RecordReaderImpl.getMin(createIntStats(10L, 100L))); - assertEquals(null, RecordReaderImpl.getMin( - OrcProto.ColumnStatistics.newBuilder() - .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder().build()) - .build())); - assertEquals(10.0d, RecordReaderImpl.getMin( + assertEquals(10L, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L)))); + assertEquals(10.0d, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder() - .setMinimum(10.0d).setMaximum(100.0d).build()).build())); - assertEquals(null, RecordReaderImpl.getMin( + .setMinimum(10.0d).setMaximum(100.0d).build()).build()))); + assertEquals(null, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setStringStatistics(OrcProto.StringStatistics.newBuilder().build()) - .build())); - assertEquals("a", RecordReaderImpl.getMin( + .build()))); + assertEquals("a", RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setStringStatistics(OrcProto.StringStatistics.newBuilder() - .setMinimum("a").setMaximum("b").build()).build())); + .setMinimum("a").setMaximum("b").build()).build()))); + assertEquals("hello", RecordReaderImpl.getMin(ColumnStatisticsImpl + .deserialize(createStringStats("hello", "world")))); + assertEquals(HiveDecimal.create("111.1"), RecordReaderImpl.getMin(ColumnStatisticsImpl + .deserialize(createDecimalStats("111.1", "112.1")))); } private static OrcProto.ColumnStatistics createIntStats(Long min, @@ -111,26 +130,39 @@ public void testGetMin() throws Exception { .setIntStatistics(intStats.build()).build(); } + private static OrcProto.ColumnStatistics createStringStats(String min, String max) { + OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder(); + strStats.setMinimum(min); + strStats.setMaximum(max); + return OrcProto.ColumnStatistics.newBuilder().setStringStatistics(strStats.build()).build(); + } + + private static OrcProto.ColumnStatistics createDecimalStats(String min, String max) { + OrcProto.DecimalStatistics.Builder decStats = OrcProto.DecimalStatistics.newBuilder(); + decStats.setMinimum(min); + decStats.setMaximum(max); + return OrcProto.ColumnStatistics.newBuilder().setDecimalStatistics(decStats.build()).build(); + } + @Test public void testGetMax() throws Exception { - assertEquals(null, RecordReaderImpl.getMax(createIntStats(null, null))); - assertEquals(100L, RecordReaderImpl.getMax(createIntStats(10L, 100L))); - assertEquals(null, RecordReaderImpl.getMax( - OrcProto.ColumnStatistics.newBuilder() - .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder().build()) - .build())); - assertEquals(100.0d, RecordReaderImpl.getMax( + assertEquals(100L, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L)))); + assertEquals(100.0d, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder() - .setMinimum(10.0d).setMaximum(100.0d).build()).build())); - assertEquals(null, RecordReaderImpl.getMax( + .setMinimum(10.0d).setMaximum(100.0d).build()).build()))); + assertEquals(null, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setStringStatistics(OrcProto.StringStatistics.newBuilder().build()) - .build())); - assertEquals("b", RecordReaderImpl.getMax( + .build()))); + assertEquals("b", RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setStringStatistics(OrcProto.StringStatistics.newBuilder() - .setMinimum("a").setMaximum("b").build()).build())); + .setMinimum("a").setMaximum("b").build()).build()))); + assertEquals("world", RecordReaderImpl.getMax(ColumnStatisticsImpl + .deserialize(createStringStats("hello", "world")))); + assertEquals(HiveDecimal.create("112.1"), RecordReaderImpl.getMax(ColumnStatisticsImpl + .deserialize(createDecimalStats("111.1", "112.1")))); } @Test @@ -150,6 +182,37 @@ public void testEquals() throws Exception { RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred)); assertEquals(TruthValue.YES_NULL, RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.EQUALS, + PredicateLeaf.Type.CHAR, "x", "b", null); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.EQUALS, + PredicateLeaf.Type.VARCHAR, "x", "b", null); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); + } @Test @@ -169,6 +232,36 @@ public void testNullSafeEquals() throws Exception { RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred)); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.CHAR, "x", "hello", null); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicate(createStringStats("world", "zombie"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("hello", "zombie"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("apple", "zombie"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("apple", "hello"), pred)); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicate(createStringStats("apple", "banana"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("hello", "hello"), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.VARCHAR, "x", "hello", null); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicate(createStringStats("world", "zombie"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("hello", "zombie"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("apple", "zombie"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("apple", "hello"), pred)); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicate(createStringStats("apple", "banana"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("hello", "hello"), pred)); } @Test @@ -186,6 +279,36 @@ public void testLessThan() throws Exception { RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), lessThan)); assertEquals(TruthValue.YES_NULL, RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), lessThan)); + + PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( + PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.CHAR, "x", "b", null); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN, + PredicateLeaf.Type.VARCHAR, "x", "b", null); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); } @Test @@ -203,6 +326,36 @@ public void testLessThanEquals() throws Exception { RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred)); assertEquals(TruthValue.YES_NULL, RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, + PredicateLeaf.Type.CHAR, "x", "b", null); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, + PredicateLeaf.Type.VARCHAR, "x", "b", null); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); } @Test @@ -221,6 +374,39 @@ public void testIn() throws Exception { RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred)); assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred)); + + args.clear(); + args.add("a"); + args.add("b"); + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.CHAR, "x", null, args); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.VARCHAR, "x", null, args); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); } @Test @@ -245,6 +431,39 @@ public void testBetween() throws Exception { RecordReaderImpl.evaluatePredicate(createIntStats(10L, 20L), pred)); assertEquals(TruthValue.YES_NULL, RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred)); + + args.clear(); + args.add("a"); + args.add("b"); + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.BETWEEN, + PredicateLeaf.Type.CHAR, "x", null, args); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.BETWEEN, + PredicateLeaf.Type.VARCHAR, "x", null, args); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); } @Test diff --git ql/src/test/queries/clientpositive/orc_ppd_char.q ql/src/test/queries/clientpositive/orc_ppd_char.q new file mode 100644 index 0000000..1f5f54a --- /dev/null +++ ql/src/test/queries/clientpositive/orc_ppd_char.q @@ -0,0 +1,76 @@ +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET mapred.min.split.size=1000; +SET mapred.max.split.size=5000; + +create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); + +insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl; + +set hive.optimize.index.filter=false; + +-- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) +select sum(hash(*)) from newtypesorc where c="apple"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c="apple"; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c!="apple"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c!="apple"; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c<"hello"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c<"hello"; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c<="hello"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c<="hello"; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c="apple "; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c="apple "; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c in ("apple", "carrot"); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c in ("apple", "carrot"); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c in ("apple", "hello"); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c in ("apple", "hello"); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c in ("carrot"); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c in ("carrot"); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c between "apple" and "carrot"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c between "apple" and "carrot"; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c between "apple" and "zombie"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c between "apple" and "zombie"; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c between "carrot" and "carrot1"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c between "carrot" and "carrot1"; + diff --git ql/src/test/queries/clientpositive/orc_ppd_date.q ql/src/test/queries/clientpositive/orc_ppd_date.q new file mode 100644 index 0000000..c34be86 --- /dev/null +++ ql/src/test/queries/clientpositive/orc_ppd_date.q @@ -0,0 +1,97 @@ +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET mapred.min.split.size=1000; +SET mapred.max.split.size=5000; + +create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); + +insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl; + +-- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) +select sum(hash(*)) from newtypesorc where da='1970-02-20'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da='1970-02-20'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as date); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as date); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as varchar(20)); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as varchar(20)); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da!='1970-02-20'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da!='1970-02-20'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da<'1970-02-27'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da<'1970-02-27'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da<'1970-02-29'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da<'1970-02-29'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da<'1970-02-15'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da<'1970-02-15'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da<='1970-02-20'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da<='1970-02-20'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da<='1970-02-27'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da<='1970-02-27'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-27' as date)); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-27' as date)); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da in (cast('1970-02-20' as date), cast('1970-02-27' as date)); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da in (cast('1970-02-20' as date), cast('1970-02-27' as date)); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-22' as date)); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-22' as date)); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-22'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-22'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-28'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-28'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da between '1970-02-18' and '1970-02-19'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da between '1970-02-18' and '1970-02-19'; diff --git ql/src/test/queries/clientpositive/orc_ppd_decimal.q ql/src/test/queries/clientpositive/orc_ppd_decimal.q new file mode 100644 index 0000000..a93590e --- /dev/null +++ ql/src/test/queries/clientpositive/orc_ppd_decimal.q @@ -0,0 +1,151 @@ +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET mapred.min.split.size=1000; +SET mapred.max.split.size=5000; + +create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); + +insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl; + +-- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) +select sum(hash(*)) from newtypesorc where d=0.22; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d=0.22; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d='0.22'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d='0.22'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d=cast('0.22' as float); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d=cast('0.22' as float); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d!=0.22; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d!=0.22; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d!='0.22'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d!='0.22'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d!=cast('0.22' as float); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d!=cast('0.22' as float); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d<11.22; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d<11.22; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d<'11.22'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d<'11.22'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d