files, Configuration conf,
buf.append("no stats at ");
} else {
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats);
- buf.append(cs.toString());
+ Object min = RecordReaderImpl.getMin(cs), max = RecordReaderImpl.getMax(cs);
+ buf.append(" count: ").append(cs.getNumberOfValues());
+ buf.append(" min: ").append(min);
+ buf.append(" max: ").append(max);
}
buf.append(" positions: ");
for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 9e6c06d..5be2b4f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -18,9 +18,18 @@
package org.apache.hadoop.hive.ql.io.orc;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.NavigableMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -45,9 +54,9 @@
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.ql.log.PerfLogger;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -65,18 +74,9 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.TreeMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
* A MapReduce/Hive input format for ORC files.
*
@@ -919,8 +919,13 @@ private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics,
if (filterColumns[pred] != -1) {
// column statistics at index 0 contains only the number of rows
- ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
- truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred));
+ ColumnStatistics stats =
+ stripeStatistics.getColumnStatistics()[filterColumns[pred]];
+ Object minValue = RecordReaderImpl.getMin(stats);
+ Object maxValue = RecordReaderImpl.getMax(stats);
+ truthValues[pred] =
+ RecordReaderImpl.evaluatePredicateRange(predLeaves.get(pred),
+ minValue, maxValue);
} else {
// parition column case.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index fcd9b26..f7fce3f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -2356,36 +2356,21 @@ static Object getMin(ColumnStatistics index) {
/**
* Evaluate a predicate with respect to the statistics from the column
* that is referenced in the predicate.
- * @param statsProto the statistics for the column mentioned in the predicate
+ * @param index the statistics for the column mentioned in the predicate
* @param predicate the leaf predicate we need to evaluation
* @return the set of truth values that may be returned for the given
* predicate.
*/
- static TruthValue evaluatePredicate(OrcProto.ColumnStatistics statsProto,
+ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics index,
PredicateLeaf predicate) {
- ColumnStatistics cs = ColumnStatisticsImpl.deserialize(statsProto);
+ ColumnStatistics cs = ColumnStatisticsImpl.deserialize(index);
Object minValue = getMin(cs);
Object maxValue = getMax(cs);
- return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull());
- }
-
- /**
- * Evaluate a predicate with respect to the statistics from the column
- * that is referenced in the predicate.
- * @param stats the statistics for the column mentioned in the predicate
- * @param predicate the leaf predicate we need to evaluation
- * @return the set of truth values that may be returned for the given
- * predicate.
- */
- static TruthValue evaluatePredicate(ColumnStatistics stats,
- PredicateLeaf predicate) {
- Object minValue = getMin(stats);
- Object maxValue = getMax(stats);
- return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull());
+ return evaluatePredicateRange(predicate, minValue, maxValue);
}
static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
- Object max, boolean hasNull) {
+ Object max) {
// if we didn't have any values, everything must have been null
if (min == null) {
if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
@@ -2420,29 +2405,29 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
case EQUALS:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (minValue.equals(maxValue) && loc == Location.MIN) {
- return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
+ return TruthValue.YES_NULL;
} else if (loc == Location.BEFORE || loc == Location.AFTER) {
- return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+ return TruthValue.NO_NULL;
} else {
- return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ return TruthValue.YES_NO_NULL;
}
case LESS_THAN:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.AFTER) {
- return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
+ return TruthValue.YES_NULL;
} else if (loc == Location.BEFORE || loc == Location.MIN) {
- return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+ return TruthValue.NO_NULL;
} else {
- return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ return TruthValue.YES_NO_NULL;
}
case LESS_THAN_EQUALS:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.AFTER || loc == Location.MAX) {
- return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
+ return TruthValue.YES_NULL;
} else if (loc == Location.BEFORE) {
- return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+ return TruthValue.NO_NULL;
} else {
- return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ return TruthValue.YES_NO_NULL;
}
case IN:
if (minValue.equals(maxValue)) {
@@ -2452,10 +2437,10 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
predObj = getBaseObjectForComparison(arg, minValue);
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.MIN) {
- return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
+ return TruthValue.YES_NULL;
}
}
- return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+ return TruthValue.NO_NULL;
} else {
// are all of the values outside of the range?
for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
@@ -2463,10 +2448,10 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.MIN || loc == Location.MIDDLE ||
loc == Location.MAX) {
- return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ return TruthValue.YES_NO_NULL;
}
}
- return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+ return TruthValue.NO_NULL;
}
case BETWEEN:
List