files, Configuration conf,
buf.append("no stats at ");
} else {
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats);
- Object min = RecordReaderImpl.getMin(cs), max = RecordReaderImpl.getMax(cs);
- buf.append(" count: ").append(cs.getNumberOfValues());
- buf.append(" min: ").append(min);
- buf.append(" max: ").append(max);
+ buf.append(cs.toString());
}
buf.append(" positions: ");
for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 5be2b4f..9e6c06d 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -18,18 +18,9 @@
package org.apache.hadoop.hive.ql.io.orc;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.NavigableMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -54,9 +45,9 @@
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.ql.log.PerfLogger;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -74,9 +65,18 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
/**
* A MapReduce/Hive input format for ORC files.
*
@@ -919,13 +919,8 @@ private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics,
if (filterColumns[pred] != -1) {
// column statistics at index 0 contains only the number of rows
- ColumnStatistics stats =
- stripeStatistics.getColumnStatistics()[filterColumns[pred]];
- Object minValue = RecordReaderImpl.getMin(stats);
- Object maxValue = RecordReaderImpl.getMax(stats);
- truthValues[pred] =
- RecordReaderImpl.evaluatePredicateRange(predLeaves.get(pred),
- minValue, maxValue);
+ ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
+ truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred));
} else {
// parition column case.
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index f7fce3f..fcd9b26 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -2356,21 +2356,36 @@ static Object getMin(ColumnStatistics index) {
/**
* Evaluate a predicate with respect to the statistics from the column
* that is referenced in the predicate.
- * @param index the statistics for the column mentioned in the predicate
+ * @param statsProto the statistics for the column mentioned in the predicate
* @param predicate the leaf predicate we need to evaluation
* @return the set of truth values that may be returned for the given
* predicate.
*/
- static TruthValue evaluatePredicate(OrcProto.ColumnStatistics index,
+ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics statsProto,
PredicateLeaf predicate) {
- ColumnStatistics cs = ColumnStatisticsImpl.deserialize(index);
+ ColumnStatistics cs = ColumnStatisticsImpl.deserialize(statsProto);
Object minValue = getMin(cs);
Object maxValue = getMax(cs);
- return evaluatePredicateRange(predicate, minValue, maxValue);
+ return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull());
+ }
+
+ /**
+ * Evaluate a predicate with respect to the statistics from the column
+ * that is referenced in the predicate.
+ * @param stats the statistics for the column mentioned in the predicate
+ * @param predicate the leaf predicate we need to evaluation
+ * @return the set of truth values that may be returned for the given
+ * predicate.
+ */
+ static TruthValue evaluatePredicate(ColumnStatistics stats,
+ PredicateLeaf predicate) {
+ Object minValue = getMin(stats);
+ Object maxValue = getMax(stats);
+ return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull());
}
static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
- Object max) {
+ Object max, boolean hasNull) {
// if we didn't have any values, everything must have been null
if (min == null) {
if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
@@ -2405,29 +2420,29 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
case EQUALS:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (minValue.equals(maxValue) && loc == Location.MIN) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc == Location.BEFORE || loc == Location.AFTER) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case LESS_THAN:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.AFTER) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc == Location.BEFORE || loc == Location.MIN) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case LESS_THAN_EQUALS:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.AFTER || loc == Location.MAX) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc == Location.BEFORE) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case IN:
if (minValue.equals(maxValue)) {
@@ -2437,10 +2452,10 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
predObj = getBaseObjectForComparison(arg, minValue);
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.MIN) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
}
}
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
// are all of the values outside of the range?
for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
@@ -2448,10 +2463,10 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.MIN || loc == Location.MIDDLE ||
loc == Location.MAX) {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
}
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
}
case BETWEEN:
List