diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index abdc165..7ad7ecc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -633,10 +633,10 @@ private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, // column statistics at index 0 contains only the number of rows ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred] + 1]; - Object minValue = getMin(stats); - Object maxValue = getMax(stats); - truthValues[pred] = RecordReaderImpl.evaluatePredicateRange(predLeaves.get(pred), - minValue, maxValue); + Object minValue = RecordReaderImpl.getMin(stats); + Object maxValue = RecordReaderImpl.getMax(stats); + PredicateLeaf predLeaf = predLeaves.get(pred); + truthValues[pred] = RecordReaderImpl.evaluatePredicateRange(predLeaf, minValue, maxValue); } else { // parition column case. @@ -649,35 +649,6 @@ private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, } return true; } - - private Object getMax(ColumnStatistics index) { - if (index instanceof IntegerColumnStatistics) { - return ((IntegerColumnStatistics) index).getMaximum(); - } else if (index instanceof DoubleColumnStatistics) { - return ((DoubleColumnStatistics) index).getMaximum(); - } else if (index instanceof StringColumnStatistics) { - return ((StringColumnStatistics) index).getMaximum(); - } else if (index instanceof DateColumnStatistics) { - return ((DateColumnStatistics) index).getMaximum(); - } else { - return null; - } - } - - private Object getMin(ColumnStatistics index) { - if (index instanceof IntegerColumnStatistics) { - return ((IntegerColumnStatistics) index).getMinimum(); - } else if (index instanceof DoubleColumnStatistics) { - return ((DoubleColumnStatistics) index).getMinimum(); - } else if (index instanceof StringColumnStatistics) { - return ((StringColumnStatistics) index).getMinimum(); - } else if (index instanceof DateColumnStatistics) { - return ((DateColumnStatistics) index).getMinimum(); - } else { - return null; - } - } - } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 173fb8d..303be74 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -33,7 +33,10 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -42,13 +45,22 @@ import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare.CompareType; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -1973,7 +1985,7 @@ private static TreeReader createTreeReader(Path path, } /** - * Given a point and min and max, determine if the point is before, at the + * Given a point and min and max of same type, determine if the point is before, at the * min, in the middle, at the max, or after the range. * @param point the point to test * @param min the minimum point @@ -1998,30 +2010,78 @@ private static TreeReader createTreeReader(Path path, } /** - * Get the minimum value out of an index entry. - * @param index the index entry - * @return the object for the minimum value or null if there isn't one + * Given a point and min and max of different type, determine if the point is before, at the + * min, in the middle, at the max, or after the range. + * @param point the point to test + * @param min the minimum point + * @param max the maximum point + * @return the location of the point */ - static Object getMin(OrcProto.ColumnStatistics index) { - if (index.hasIntStatistics()) { - OrcProto.IntegerStatistics stat = index.getIntStatistics(); - if (stat.hasMinimum()) { - return stat.getMinimum(); - } + static Location compareToRange(Object point, Object min, Object max) { + // object inspectors + ObjectInspector pointOI = + ObjectInspectorFactory.getReflectionObjectInspector(point.getClass(), + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + ObjectInspector minOI = + ObjectInspectorFactory.getReflectionObjectInspector(min.getClass(), + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + + // type infos + TypeInfo pointTI = TypeInfoUtils.getTypeInfoFromObjectInspector(pointOI); + TypeInfo minTI = TypeInfoUtils.getTypeInfoFromObjectInspector(minOI); + + // comparison object inspector + TypeInfo compareType = FunctionRegistry.getCommonClassForComparison(pointTI, minTI); + ObjectInspector compareOI = + TypeInfoUtils + .getStandardWritableObjectInspectorFromTypeInfo((compareType == null) ? TypeInfoFactory.doubleTypeInfo + : compareType); + + // base converters + Converter converter0 = ObjectInspectorConverters.getConverter(pointOI, compareOI); + Converter converter1 = ObjectInspectorConverters.getConverter(minOI, compareOI); + + // converted objects + Object converted_o0 = converter0.convert(point); + if (converted_o0 == null) { + return Location.MIDDLE; + } + Object converted_o1 = converter1.convert(min); + if (converted_o1 == null) { + return Location.MIDDLE; + } + + // compare converted objects + int minCompare = ObjectInspectorUtils.compare(converted_o0, compareOI, converted_o1, compareOI); + if (minCompare < 0) { + return Location.BEFORE; + } else if (minCompare == 0) { + return Location.MIN; } - if (index.hasStringStatistics()) { - OrcProto.StringStatistics stat = index.getStringStatistics(); - if (stat.hasMinimum()) { - return stat.getMinimum(); - } + + ObjectInspector maxOI = + ObjectInspectorFactory.getReflectionObjectInspector(max.getClass(), + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + TypeInfo maxTI = TypeInfoUtils.getTypeInfoFromObjectInspector(maxOI); + compareType = FunctionRegistry.getCommonClassForComparison(pointTI, maxTI); + compareType = FunctionRegistry.getCommonClassForComparison(pointTI, maxTI); + compareOI = + TypeInfoUtils + .getStandardWritableObjectInspectorFromTypeInfo((compareType == null) ? TypeInfoFactory.doubleTypeInfo + : compareType); + converter1 = ObjectInspectorConverters.getConverter(maxOI, compareOI); + converted_o1 = converter1.convert(max); + if (converted_o1 == null) { + return Location.MIDDLE; } - if (index.hasDoubleStatistics()) { - OrcProto.DoubleStatistics stat = index.getDoubleStatistics(); - if (stat.hasMinimum()) { - return stat.getMinimum(); - } + + int maxCompare = ObjectInspectorUtils.compare(converted_o0, compareOI, converted_o1, compareOI); + if (maxCompare > 0) { + return Location.AFTER; + } else if (maxCompare == 0) { + return Location.MAX; } - return null; + return Location.MIDDLE; } /** @@ -2029,26 +2089,41 @@ static Object getMin(OrcProto.ColumnStatistics index) { * @param index the index entry * @return the object for the maximum value or null if there isn't one */ - static Object getMax(OrcProto.ColumnStatistics index) { - if (index.hasIntStatistics()) { - OrcProto.IntegerStatistics stat = index.getIntStatistics(); - if (stat.hasMaximum()) { - return stat.getMaximum(); - } - } - if (index.hasStringStatistics()) { - OrcProto.StringStatistics stat = index.getStringStatistics(); - if (stat.hasMaximum()) { - return stat.getMaximum(); - } + static Object getMax(ColumnStatistics index) { + if (index instanceof IntegerColumnStatistics) { + return ((IntegerColumnStatistics) index).getMaximum(); + } else if (index instanceof DoubleColumnStatistics) { + return ((DoubleColumnStatistics) index).getMaximum(); + } else if (index instanceof StringColumnStatistics) { + return ((StringColumnStatistics) index).getMaximum(); + } else if (index instanceof DateColumnStatistics) { + return ((DateColumnStatistics) index).getMaximum(); + } else if (index instanceof DecimalColumnStatistics) { + return ((DecimalColumnStatistics) index).getMaximum(); + } else { + return null; } - if (index.hasDoubleStatistics()) { - OrcProto.DoubleStatistics stat = index.getDoubleStatistics(); - if (stat.hasMaximum()) { - return stat.getMaximum(); - } + } + + /** + * Get the minimum value out of an index entry. + * @param index the index entry + * @return the object for the minimum value or null if there isn't one + */ + static Object getMin(ColumnStatistics index) { + if (index instanceof IntegerColumnStatistics) { + return ((IntegerColumnStatistics) index).getMinimum(); + } else if (index instanceof DoubleColumnStatistics) { + return ((DoubleColumnStatistics) index).getMinimum(); + } else if (index instanceof StringColumnStatistics) { + return ((StringColumnStatistics) index).getMinimum(); + } else if (index instanceof DateColumnStatistics) { + return ((DateColumnStatistics) index).getMinimum(); + } else if (index instanceof DecimalColumnStatistics) { + return ((DecimalColumnStatistics) index).getMinimum(); + } else { + return null; } - return null; } /** @@ -2061,7 +2136,9 @@ static Object getMax(OrcProto.ColumnStatistics index) { */ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics index, PredicateLeaf predicate) { - Object minValue = getMin(index); + + ColumnStatistics cs = ColumnStatisticsImpl.deserialize(index); + Object minValue = getMin(cs); // if we didn't have any values, everything must have been null if (minValue == null) { if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) { @@ -2070,25 +2147,58 @@ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics index, return TruthValue.NULL; } } - Object maxValue = getMax(index); + Object maxValue = getMax(cs); return evaluatePredicateRange(predicate, minValue, maxValue); } static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object minValue, Object maxValue) { - Location loc; + Location loc = Location.MIDDLE; + Object predObj = predicate.getLiteral(); + CompareType compareType = CompareType.NEED_CONVERT; + + // special case: column statistics for char/varchar columns are stored + // as strings (with spaces). The base type of comparison between char/varchar + // and strings is Text. Converting char/varchar to Text strips off the spaces + // but converting String to Text does not strip off the space. So in case of + // char/varchar predicates we will convert min and max values to char/varchar + // type to make comparison easier. + if (predicate.getType().equals(PredicateLeaf.Type.CHAR)) { + minValue = new HiveChar(minValue.toString(), HiveChar.MAX_CHAR_LENGTH); + maxValue = new HiveChar(maxValue.toString(), HiveChar.MAX_CHAR_LENGTH); + } + + if (predicate.getType().equals(PredicateLeaf.Type.VARCHAR)) { + minValue = new HiveVarchar(minValue.toString(), HiveVarchar.MAX_VARCHAR_LENGTH); + maxValue = new HiveVarchar(maxValue.toString(), HiveVarchar.MAX_VARCHAR_LENGTH); + } + + if (predObj != null && minValue != null) { + compareType = getCompareType(predObj, minValue); + } else { + if (predObj != null && maxValue != null) { + compareType = getCompareType(predObj, maxValue); + } + } + switch (predicate.getOperator()) { case NULL_SAFE_EQUALS: - loc = compareToRange((Comparable) predicate.getLiteral(), - minValue, maxValue); + if(compareType.equals(CompareType.NEED_CONVERT)) { + loc = compareToRange(predObj, minValue, maxValue); + } else { + loc = compareToRange((Comparable) predObj, minValue, maxValue); + } if (loc == Location.BEFORE || loc == Location.AFTER) { return TruthValue.NO; } else { return TruthValue.YES_NO; } case EQUALS: - loc = compareToRange((Comparable) predicate.getLiteral(), - minValue, maxValue); + if(compareType.equals(CompareType.NEED_CONVERT)) { + loc = compareToRange(predObj, minValue, maxValue); + } else { + loc = compareToRange((Comparable) predObj, minValue, maxValue); + } if (minValue.equals(maxValue) && loc == Location.MIN) { return TruthValue.YES_NULL; } else if (loc == Location.BEFORE || loc == Location.AFTER) { @@ -2097,8 +2207,11 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object minValu return TruthValue.YES_NO_NULL; } case LESS_THAN: - loc = compareToRange((Comparable) predicate.getLiteral(), - minValue, maxValue); + if(compareType.equals(CompareType.NEED_CONVERT)) { + loc = compareToRange(predObj, minValue, maxValue); + } else { + loc = compareToRange((Comparable) predObj, minValue, maxValue); + } if (loc == Location.AFTER) { return TruthValue.YES_NULL; } else if (loc == Location.BEFORE || loc == Location.MIN) { @@ -2107,8 +2220,11 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object minValu return TruthValue.YES_NO_NULL; } case LESS_THAN_EQUALS: - loc = compareToRange((Comparable) predicate.getLiteral(), - minValue, maxValue); + if(compareType.equals(CompareType.NEED_CONVERT)) { + loc = compareToRange(predObj, minValue, maxValue); + } else { + loc = compareToRange((Comparable) predObj, minValue, maxValue); + } if (loc == Location.AFTER || loc == Location.MAX) { return TruthValue.YES_NULL; } else if (loc == Location.BEFORE) { @@ -2121,7 +2237,11 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object minValu // for a single value, look through to see if that value is in the // set for(Object arg: predicate.getLiteralList()) { - loc = compareToRange((Comparable) arg, minValue, maxValue); + if(compareType.equals(CompareType.NEED_CONVERT)) { + loc = compareToRange(arg, minValue, maxValue); + } else { + loc = compareToRange((Comparable) arg, minValue, maxValue); + } if (loc == Location.MIN) { return TruthValue.YES_NULL; } @@ -2130,7 +2250,11 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object minValu } else { // are all of the values outside of the range? for(Object arg: predicate.getLiteralList()) { - loc = compareToRange((Comparable) arg, minValue, maxValue); + if(compareType.equals(CompareType.NEED_CONVERT)) { + loc = compareToRange(arg, minValue, maxValue); + } else { + loc = compareToRange((Comparable) arg, minValue, maxValue); + } if (loc == Location.MIN || loc == Location.MIDDLE || loc == Location.MAX) { return TruthValue.YES_NO_NULL; @@ -2140,10 +2264,20 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object minValu } case BETWEEN: List args = predicate.getLiteralList(); - loc = compareToRange((Comparable) args.get(0), minValue, maxValue); + if(compareType.equals(CompareType.NEED_CONVERT)) { + loc = compareToRange(args.get(0), minValue, maxValue); + } else { + loc = compareToRange((Comparable) args.get(0), minValue, maxValue); + } if (loc == Location.BEFORE || loc == Location.MIN) { - Location loc2 = compareToRange((Comparable) args.get(1), minValue, - maxValue); + Location loc2; + if(compareType.equals(CompareType.NEED_CONVERT)) { + loc2 = compareToRange(args.get(1), minValue, + maxValue); + } else { + loc2 = compareToRange((Comparable) args.get(1), minValue, + maxValue); + } if (loc2 == Location.AFTER || loc2 == Location.MAX) { return TruthValue.YES_NULL; } else if (loc2 == Location.BEFORE) { @@ -2163,6 +2297,24 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object minValu } } + private static CompareType getCompareType(Object o1, Object o2) { + ObjectInspector oi1 = ObjectInspectorFactory.getReflectionObjectInspector + (o1.getClass(), + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + ObjectInspector oi2 = ObjectInspectorFactory.getReflectionObjectInspector + (o2.getClass(), + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + TypeInfo oiTypeInfo0 = TypeInfoUtils.getTypeInfoFromObjectInspector(oi1); + TypeInfo oiTypeInfo1 = TypeInfoUtils.getTypeInfoFromObjectInspector(oi2); + CompareType compareType = CompareType.NEED_CONVERT; + + if (oiTypeInfo0 == oiTypeInfo1 + || TypeInfoUtils.doPrimitiveCategoriesMatch(oiTypeInfo0, oiTypeInfo1)) { + return CompareType.SAME_TYPE; + } + return compareType; + } + /** * Pick the row groups that we need to load from the current stripe. * @return an array with a boolean for each row group or null if all of the diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java index 490e92c..922b99f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java @@ -45,7 +45,11 @@ public static enum Type { INTEGER, // all of the integer types FLOAT, // float and double - STRING + STRING, + DATE, + DECIMAL, + CHAR, + VARCHAR } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index 3b918b0..a443110 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -283,11 +283,19 @@ Operator getOperator() { case INT: case LONG: return PredicateLeaf.Type.INTEGER; + case CHAR: + return PredicateLeaf.Type.CHAR; + case VARCHAR: + return PredicateLeaf.Type.VARCHAR; case STRING: return PredicateLeaf.Type.STRING; case FLOAT: case DOUBLE: return PredicateLeaf.Type.FLOAT; + case DATE: + return PredicateLeaf.Type.DATE; + case DECIMAL: + return PredicateLeaf.Type.DECIMAL; default: } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java index 1762073..b699b63 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java @@ -18,21 +18,22 @@ package org.apache.hadoop.hive.ql.io.orc; -import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; -import org.apache.hadoop.hive.ql.io.sarg.TestSearchArgumentImpl; -import org.junit.Test; - -import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.Location; - -import java.util.ArrayList; -import java.util.List; - import static junit.framework.Assert.assertEquals; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; +import java.sql.Date; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.Location; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; +import org.apache.hadoop.hive.ql.io.sarg.TestSearchArgumentImpl; +import org.junit.Test; + public class TestRecordReaderImpl { @Test @@ -76,25 +77,118 @@ public void testCompareToRangeString() throws Exception { } @Test + public void testCompareToDecimalNeedConvert() throws Exception { + assertEquals(Location.BEFORE, + RecordReaderImpl.compareToRange(25, HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.AFTER, + RecordReaderImpl.compareToRange(50, HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.MIN, + RecordReaderImpl.compareToRange(30, HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.MIDDLE, + RecordReaderImpl.compareToRange(35, HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.MAX, + RecordReaderImpl.compareToRange(40, HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.BEFORE, + RecordReaderImpl.compareToRange(25, HiveDecimal.create(30), HiveDecimal.create(30))); + assertEquals(Location.MIN, + RecordReaderImpl.compareToRange(30, HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.AFTER, + RecordReaderImpl.compareToRange(35, HiveDecimal.create(30), HiveDecimal.create(30))); + + assertEquals(Location.BEFORE, + RecordReaderImpl.compareToRange(25.0, HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.AFTER, + RecordReaderImpl.compareToRange(50.0, HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.MIN, + RecordReaderImpl.compareToRange(30.0, HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.MIDDLE, + RecordReaderImpl.compareToRange(35.0, HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.MAX, + RecordReaderImpl.compareToRange(40.0, HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.BEFORE, + RecordReaderImpl.compareToRange(25.0, HiveDecimal.create(30), HiveDecimal.create(30))); + assertEquals(Location.MIN, + RecordReaderImpl.compareToRange(30.0, HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.AFTER, + RecordReaderImpl.compareToRange(35.0, HiveDecimal.create(30), HiveDecimal.create(30))); + + assertEquals(Location.BEFORE, + RecordReaderImpl.compareToRange("25", HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.AFTER, + RecordReaderImpl.compareToRange("50", HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.MIN, + RecordReaderImpl.compareToRange("30", HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.MIDDLE, + RecordReaderImpl.compareToRange("35", HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.MAX, + RecordReaderImpl.compareToRange("40", HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.BEFORE, + RecordReaderImpl.compareToRange("25", HiveDecimal.create(30), HiveDecimal.create(30))); + assertEquals(Location.MIN, + RecordReaderImpl.compareToRange("30", HiveDecimal.create(30), HiveDecimal.create(40))); + assertEquals(Location.AFTER, + RecordReaderImpl.compareToRange("35", HiveDecimal.create(30), HiveDecimal.create(30))); + } + + @Test + public void testCompareToDateNeedConvert() throws Exception { + assertEquals(Location.BEFORE, + RecordReaderImpl.compareToRange("1970-01-01", Date.valueOf("1970-01-02"), Date.valueOf("1970-01-04"))); + assertEquals(Location.AFTER, + RecordReaderImpl.compareToRange("1970-01-05", Date.valueOf("1970-01-02"), Date.valueOf("1970-01-04"))); + assertEquals(Location.MIN, + RecordReaderImpl.compareToRange("1970-01-02", Date.valueOf("1970-01-02"), Date.valueOf("1970-01-04"))); + assertEquals(Location.MIDDLE, + RecordReaderImpl.compareToRange("1970-01-03", Date.valueOf("1970-01-02"), Date.valueOf("1970-01-04"))); + assertEquals(Location.MAX, + RecordReaderImpl.compareToRange("1970-01-04", Date.valueOf("1970-01-02"), Date.valueOf("1970-01-04"))); + assertEquals(Location.BEFORE, + RecordReaderImpl.compareToRange("1970-01-01", Date.valueOf("1970-01-02"), Date.valueOf("1970-01-02"))); + assertEquals(Location.MIN, + RecordReaderImpl.compareToRange("1970-01-02", Date.valueOf("1970-01-02"), Date.valueOf("1970-01-02"))); + assertEquals(Location.AFTER, + RecordReaderImpl.compareToRange("1970-01-03", Date.valueOf("1970-01-02"), Date.valueOf("1970-01-02"))); + } + + @Test + public void testCompareToCharNeedConvert() throws Exception { + assertEquals(Location.BEFORE, + RecordReaderImpl.compareToRange("apple", "hello", "world")); + assertEquals(Location.AFTER, + RecordReaderImpl.compareToRange("zombie", "hello", "world")); + assertEquals(Location.MIN, + RecordReaderImpl.compareToRange("hello", "hello", "world")); + assertEquals(Location.MIDDLE, + RecordReaderImpl.compareToRange("pilot", "hello", "world")); + assertEquals(Location.MAX, + RecordReaderImpl.compareToRange("world", "hello", "world")); + assertEquals(Location.BEFORE, + RecordReaderImpl.compareToRange("apple", "hello", "hello")); + assertEquals(Location.MIN, + RecordReaderImpl.compareToRange("hello", "hello", "hello")); + assertEquals(Location.AFTER, + RecordReaderImpl.compareToRange("zombie", "hello", "hello")); + } + + @Test public void testGetMin() throws Exception { - assertEquals(null, RecordReaderImpl.getMin(createIntStats(null, null))); - assertEquals(10L, RecordReaderImpl.getMin(createIntStats(10L, 100L))); - assertEquals(null, RecordReaderImpl.getMin( - OrcProto.ColumnStatistics.newBuilder() - .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder().build()) - .build())); - assertEquals(10.0d, RecordReaderImpl.getMin( + assertEquals(10L, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L)))); + assertEquals(10.0d, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder() - .setMinimum(10.0d).setMaximum(100.0d).build()).build())); - assertEquals(null, RecordReaderImpl.getMin( + .setMinimum(10.0d).setMaximum(100.0d).build()).build()))); + assertEquals(null, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setStringStatistics(OrcProto.StringStatistics.newBuilder().build()) - .build())); - assertEquals("a", RecordReaderImpl.getMin( + .build()))); + assertEquals("a", RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setStringStatistics(OrcProto.StringStatistics.newBuilder() - .setMinimum("a").setMaximum("b").build()).build())); + .setMinimum("a").setMaximum("b").build()).build()))); + assertEquals("hello", RecordReaderImpl.getMin(ColumnStatisticsImpl + .deserialize(createStringStats("hello", "world")))); + assertEquals(HiveDecimal.create("111.1"), RecordReaderImpl.getMin(ColumnStatisticsImpl + .deserialize(createDecimalStats("111.1", "112.1")))); } private static OrcProto.ColumnStatistics createIntStats(Long min, @@ -111,26 +205,39 @@ public void testGetMin() throws Exception { .setIntStatistics(intStats.build()).build(); } + private static OrcProto.ColumnStatistics createStringStats(String min, String max) { + OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder(); + strStats.setMinimum(min); + strStats.setMaximum(max); + return OrcProto.ColumnStatistics.newBuilder().setStringStatistics(strStats.build()).build(); + } + + private static OrcProto.ColumnStatistics createDecimalStats(String min, String max) { + OrcProto.DecimalStatistics.Builder decStats = OrcProto.DecimalStatistics.newBuilder(); + decStats.setMinimum(min); + decStats.setMaximum(max); + return OrcProto.ColumnStatistics.newBuilder().setDecimalStatistics(decStats.build()).build(); + } + @Test public void testGetMax() throws Exception { - assertEquals(null, RecordReaderImpl.getMax(createIntStats(null, null))); - assertEquals(100L, RecordReaderImpl.getMax(createIntStats(10L, 100L))); - assertEquals(null, RecordReaderImpl.getMax( - OrcProto.ColumnStatistics.newBuilder() - .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder().build()) - .build())); - assertEquals(100.0d, RecordReaderImpl.getMax( + assertEquals(100L, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L)))); + assertEquals(100.0d, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder() - .setMinimum(10.0d).setMaximum(100.0d).build()).build())); - assertEquals(null, RecordReaderImpl.getMax( + .setMinimum(10.0d).setMaximum(100.0d).build()).build()))); + assertEquals(null, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setStringStatistics(OrcProto.StringStatistics.newBuilder().build()) - .build())); - assertEquals("b", RecordReaderImpl.getMax( + .build()))); + assertEquals("b", RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setStringStatistics(OrcProto.StringStatistics.newBuilder() - .setMinimum("a").setMaximum("b").build()).build())); + .setMinimum("a").setMaximum("b").build()).build()))); + assertEquals("world", RecordReaderImpl.getMax(ColumnStatisticsImpl + .deserialize(createStringStats("hello", "world")))); + assertEquals(HiveDecimal.create("112.1"), RecordReaderImpl.getMax(ColumnStatisticsImpl + .deserialize(createDecimalStats("111.1", "112.1")))); } @Test @@ -150,6 +257,37 @@ public void testEquals() throws Exception { RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred)); assertEquals(TruthValue.YES_NULL, RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.EQUALS, + PredicateLeaf.Type.CHAR, "x", "b", null); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.EQUALS, + PredicateLeaf.Type.VARCHAR, "x", "b", null); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); + } @Test @@ -169,6 +307,36 @@ public void testNullSafeEquals() throws Exception { RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred)); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.CHAR, "x", "hello", null); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicate(createStringStats("world", "zombie"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("hello", "zombie"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("apple", "zombie"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("apple", "hello"), pred)); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicate(createStringStats("apple", "banana"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("hello", "hello"), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.VARCHAR, "x", "hello", null); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicate(createStringStats("world", "zombie"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("hello", "zombie"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("apple", "zombie"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("apple", "hello"), pred)); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicate(createStringStats("apple", "banana"), pred)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(createStringStats("hello", "hello"), pred)); } @Test @@ -186,6 +354,36 @@ public void testLessThan() throws Exception { RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), lessThan)); assertEquals(TruthValue.YES_NULL, RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), lessThan)); + + PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( + PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.CHAR, "x", "b", null); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN, + PredicateLeaf.Type.VARCHAR, "x", "b", null); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); } @Test @@ -203,6 +401,36 @@ public void testLessThanEquals() throws Exception { RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred)); assertEquals(TruthValue.YES_NULL, RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, + PredicateLeaf.Type.CHAR, "x", "b", null); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, + PredicateLeaf.Type.VARCHAR, "x", "b", null); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); } @Test @@ -221,6 +449,39 @@ public void testIn() throws Exception { RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred)); assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred)); + + args.clear(); + args.add("a"); + args.add("b"); + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.CHAR, "x", null, args); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.VARCHAR, "x", null, args); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); } @Test @@ -245,6 +506,39 @@ public void testBetween() throws Exception { RecordReaderImpl.evaluatePredicate(createIntStats(10L, 20L), pred)); assertEquals(TruthValue.YES_NULL, RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred)); + + args.clear(); + args.add("a"); + args.add("b"); + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.BETWEEN, + PredicateLeaf.Type.CHAR, "x", null, args); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.BETWEEN, + PredicateLeaf.Type.VARCHAR, "x", null, args); + assertEquals(TruthValue.NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("c", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "d"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "d"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "b"), pred)); + assertEquals(TruthValue.YES_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("a", "a"), pred)); + assertEquals(TruthValue.YES_NO_NULL, + RecordReaderImpl.evaluatePredicate(createStringStats("b", "b"), pred)); } @Test diff --git ql/src/test/queries/clientpositive/orc_ppd_char.q ql/src/test/queries/clientpositive/orc_ppd_char.q new file mode 100644 index 0000000..1f5f54a --- /dev/null +++ ql/src/test/queries/clientpositive/orc_ppd_char.q @@ -0,0 +1,76 @@ +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET mapred.min.split.size=1000; +SET mapred.max.split.size=5000; + +create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); + +insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl; + +set hive.optimize.index.filter=false; + +-- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) +select sum(hash(*)) from newtypesorc where c="apple"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c="apple"; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c!="apple"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c!="apple"; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c<"hello"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c<"hello"; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c<="hello"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c<="hello"; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c="apple "; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c="apple "; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c in ("apple", "carrot"); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c in ("apple", "carrot"); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c in ("apple", "hello"); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c in ("apple", "hello"); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c in ("carrot"); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c in ("carrot"); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c between "apple" and "carrot"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c between "apple" and "carrot"; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c between "apple" and "zombie"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c between "apple" and "zombie"; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where c between "carrot" and "carrot1"; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where c between "carrot" and "carrot1"; + diff --git ql/src/test/queries/clientpositive/orc_ppd_date.q ql/src/test/queries/clientpositive/orc_ppd_date.q new file mode 100644 index 0000000..c34be86 --- /dev/null +++ ql/src/test/queries/clientpositive/orc_ppd_date.q @@ -0,0 +1,97 @@ +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET mapred.min.split.size=1000; +SET mapred.max.split.size=5000; + +create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); + +insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl; + +-- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) +select sum(hash(*)) from newtypesorc where da='1970-02-20'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da='1970-02-20'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as date); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as date); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as varchar(20)); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as varchar(20)); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da!='1970-02-20'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da!='1970-02-20'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da<'1970-02-27'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da<'1970-02-27'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da<'1970-02-29'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da<'1970-02-29'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da<'1970-02-15'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da<'1970-02-15'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da<='1970-02-20'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da<='1970-02-20'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da<='1970-02-27'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da<='1970-02-27'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-27' as date)); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-27' as date)); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da in (cast('1970-02-20' as date), cast('1970-02-27' as date)); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da in (cast('1970-02-20' as date), cast('1970-02-27' as date)); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-22' as date)); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-22' as date)); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-22'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-22'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-28'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-28'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where da between '1970-02-18' and '1970-02-19'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where da between '1970-02-18' and '1970-02-19'; diff --git ql/src/test/queries/clientpositive/orc_ppd_decimal.q ql/src/test/queries/clientpositive/orc_ppd_decimal.q new file mode 100644 index 0000000..a93590e --- /dev/null +++ ql/src/test/queries/clientpositive/orc_ppd_decimal.q @@ -0,0 +1,151 @@ +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET mapred.min.split.size=1000; +SET mapred.max.split.size=5000; + +create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); + +insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl; + +-- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) +select sum(hash(*)) from newtypesorc where d=0.22; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d=0.22; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d='0.22'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d='0.22'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d=cast('0.22' as float); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d=cast('0.22' as float); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d!=0.22; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d!=0.22; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d!='0.22'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d!='0.22'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d!=cast('0.22' as float); + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d!=cast('0.22' as float); + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d<11.22; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d<11.22; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d<'11.22'; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where d<'11.22'; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where d