files, Configuration conf,
buf.append("no stats at ");
} else {
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats);
- Object min = RecordReaderImpl.getMin(cs), max = RecordReaderImpl.getMax(cs);
- buf.append(" count: ").append(cs.getNumberOfValues());
- buf.append(" min: ").append(min);
- buf.append(" max: ").append(max);
+ buf.append(cs.toString());
}
buf.append(" positions: ");
for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
index b46937c..0281c86 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
@@ -18,7 +18,14 @@
package org.apache.hadoop.hive.ql.io.orc;
-import java.io.IOException;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_BLOOM_FILTER_COLUMNS;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -26,7 +33,7 @@
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.*;
+import java.io.IOException;
/**
* Contains factory methods to read or write ORC files.
@@ -148,7 +155,8 @@ private WriterVersion(int id) {
ROW_INDEX_STRIDE("orc.row.index.stride"),
ENABLE_INDEXES("orc.create.index"),
BLOCK_PADDING("orc.block.padding"),
- ENCODING_STRATEGY("orc.encoding.strategy");
+ ENCODING_STRATEGY("orc.encoding.strategy"),
+ BLOOM_FILTER_COLUMNS("orc.bloom.filter.columns");
private final String propName;
@@ -256,6 +264,8 @@ public static Reader createReader(Path path,
private EncodingStrategy encodingStrategy;
private CompressionStrategy compressionStrategy;
private float paddingTolerance;
+ private String bloomFilterColumns;
+ private double bloomFilterFpp;
WriterOptions(Configuration conf) {
configuration = conf;
@@ -288,9 +298,12 @@ public static Reader createReader(Path path,
compressionStrategy = CompressionStrategy.valueOf(compString);
}
- paddingTolerance =
- conf.getFloat(HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.varname,
- HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.defaultFloatVal);
+ paddingTolerance = conf.getFloat(HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.varname,
+ HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.defaultFloatVal);
+
+ bloomFilterColumns = HiveConf.getVar(conf, HIVE_ORC_BLOOM_FILTER_COLUMNS);
+ bloomFilterFpp = conf.getFloat(HiveConf.ConfVars.HIVE_ORC_BLOOM_FILTER_FPP.varname,
+ HiveConf.ConfVars.HIVE_ORC_BLOOM_FILTER_FPP.defaultFloatVal);
}
/**
@@ -367,6 +380,14 @@ public WriterOptions paddingTolerance(float value) {
}
/**
+ * Comma separated values of column names for which bloom filter is to be created.
+ */
+ public WriterOptions bloomFilterColumns(String columns) {
+ bloomFilterColumns = columns;
+ return this;
+ }
+
+ /**
* Sets the generic compression that is used to compress the data.
*/
public WriterOptions compress(CompressionKind value) {
@@ -438,8 +459,8 @@ public static Writer createWriter(Path path,
opts.memoryManagerValue, opts.blockPaddingValue,
opts.versionValue, opts.callback,
opts.encodingStrategy, opts.compressionStrategy,
- opts.paddingTolerance,
- opts.blockSizeValue);
+ opts.paddingTolerance, opts.blockSizeValue,
+ opts.bloomFilterColumns, opts.bloomFilterFpp);
}
/**
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 5be2b4f..b7841d3 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -18,18 +18,9 @@
package org.apache.hadoop.hive.ql.io.orc;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.NavigableMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -54,9 +45,9 @@
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.ql.log.PerfLogger;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -74,9 +65,18 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
/**
* A MapReduce/Hive input format for ORC files.
*
@@ -923,9 +923,8 @@ private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics,
stripeStatistics.getColumnStatistics()[filterColumns[pred]];
Object minValue = RecordReaderImpl.getMin(stats);
Object maxValue = RecordReaderImpl.getMax(stats);
- truthValues[pred] =
- RecordReaderImpl.evaluatePredicateRange(predLeaves.get(pred),
- minValue, maxValue);
+ truthValues[pred] = RecordReaderImpl.evaluatePredicateRange(predLeaves.get(pred),
+ minValue, maxValue, stats.getBloomFilter());
} else {
// parition column case.
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
index 5bd3f0c..d76e25f 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
@@ -170,6 +170,11 @@ private String getSettingFromPropsFallingBackToConf(String key, Properties props
options.encodingStrategy(EncodingStrategy.valueOf(propVal));
}
+ if ((propVal = getSettingFromPropsFallingBackToConf(
+ OrcFile.OrcTableProperties.BLOOM_FILTER_COLUMNS.getPropName(),props,conf)) != null){
+ options.bloomFilterColumns(propVal);
+ }
+
return options;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
new file mode 100644
index 0000000..c80e50f
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
@@ -0,0 +1,203 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import com.google.common.collect.Lists;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class OrcUtils {
+ private static final Log LOG = LogFactory.getLog(OrcUtils.class);
+
+ /**
+ * Returns selected columns as a boolean array with true value set for specified column names.
+ * The result will contain number of elements equal to flattened number of columns.
+ * For example:
+ * selectedColumns - a,b,c
+ * allColumns - a,b,c,d
+ * If column c is a complex type, say list and other types are primitives then result will
+ * be [false, true, true, true, true, true, false]
+ * Index 0 is the root element of the struct which is set to false by default, index 1,2
+ * corresponds to columns a and b. Index 3,4 correspond to column c which is list and
+ * index 5 correspond to column d. After flattening list gets 2 columns.
+ *
+ * @param selectedColumns - comma separated list of selected column names
+ * @param allColumns - comma separated list of all column names
+ * @param inspector - object inspector
+ * @return - boolean array with true value set for the specified column names
+ */
+ public static boolean[] includeColumns(String selectedColumns, String allColumns,
+ ObjectInspector inspector) {
+ int numFlattenedCols = getFlattenedColumnsCount(inspector);
+ boolean[] results = new boolean[numFlattenedCols];
+ if (selectedColumns != null && !selectedColumns.isEmpty()) {
+ includeColumnsImpl(results, selectedColumns, allColumns, inspector);
+ }
+ return results;
+ }
+
+ private static void includeColumnsImpl(boolean[] includeColumns, String selectedColumns,
+ String allColumns,
+ ObjectInspector inspector) {
+ Map> columnSpanMap = getColumnSpan(allColumns, inspector);
+ LOG.info("columnSpanMap: " + columnSpanMap);
+
+ String[] selCols = selectedColumns.split(",");
+ for (String sc : selCols) {
+ if (columnSpanMap.containsKey(sc)) {
+ List colSpan = columnSpanMap.get(sc);
+ int start = colSpan.get(0);
+ int end = colSpan.get(1);
+ for (int i = start; i <= end; i++) {
+ includeColumns[i] = true;
+ }
+ }
+ }
+
+ LOG.info("includeColumns: " + Arrays.asList(includeColumns));
+ }
+
+ private static Map> getColumnSpan(String allColumns,
+ ObjectInspector inspector) {
+ // map that contains the column span for each column. Column span is the number of columns
+ // required after flattening. For a given object inspector this map contains the start column
+ // id and end column id (both inclusive) after flattening.
+ // EXAMPLE:
+ // schema: struct>
+ // column span map for the above struct will be
+ // a => [1,1], b => [2,2], c => [3,5]
+ Map> columnSpanMap = new HashMap>();
+ if (allColumns != null) {
+ String[] columns = allColumns.split(",");
+ int startIdx = 0;
+ int endIdx = 0;
+ if (inspector instanceof StructObjectInspector) {
+ StructObjectInspector soi = (StructObjectInspector) inspector;
+ List extends StructField> fields = soi.getAllStructFieldRefs();
+ for (int i = 0; i < fields.size(); i++) {
+ StructField sf = fields.get(i);
+
+ // we get the type (category) from object inspector but column name from the argument.
+ // The reason for this is hive (FileSinkOperator) does not pass the actual column names,
+ // instead it passes the internal column names (_col1,_col2).
+ ObjectInspector sfOI = sf.getFieldObjectInspector();
+ String colName = columns[i];
+
+ startIdx = endIdx + 1;
+ switch (sfOI.getCategory()) {
+ case PRIMITIVE:
+ endIdx += 1;
+ break;
+ case STRUCT:
+ endIdx += 1;
+ StructObjectInspector structInsp = (StructObjectInspector) sfOI;
+ List extends StructField> structFields = structInsp.getAllStructFieldRefs();
+ for (int j = 0; j < structFields.size(); ++j) {
+ endIdx += getFlattenedColumnsCount(structFields.get(j).getFieldObjectInspector());
+ }
+ break;
+ case MAP:
+ endIdx += 1;
+ MapObjectInspector mapInsp = (MapObjectInspector) sfOI;
+ endIdx += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector());
+ endIdx += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector());
+ break;
+ case LIST:
+ endIdx += 1;
+ ListObjectInspector listInsp = (ListObjectInspector) sfOI;
+ endIdx += getFlattenedColumnsCount(listInsp.getListElementObjectInspector());
+ break;
+ case UNION:
+ endIdx += 1;
+ UnionObjectInspector unionInsp = (UnionObjectInspector) sfOI;
+ List choices = unionInsp.getObjectInspectors();
+ for (int j = 0; j < choices.size(); ++j) {
+ endIdx += getFlattenedColumnsCount(choices.get(j));
+ }
+ break;
+ default:
+ throw new IllegalArgumentException("Bad category: " +
+ inspector.getCategory());
+ }
+
+ columnSpanMap.put(colName, Lists.newArrayList(startIdx, endIdx));
+ }
+ }
+ }
+ return columnSpanMap;
+ }
+
+ /**
+ * Returns the number of columns after flatting complex types.
+ *
+ * @param inspector - object inspector
+ * @return
+ */
+ public static int getFlattenedColumnsCount(ObjectInspector inspector) {
+ int numWriters = 0;
+ switch (inspector.getCategory()) {
+ case PRIMITIVE:
+ numWriters += 1;
+ break;
+ case STRUCT:
+ numWriters += 1;
+ StructObjectInspector structInsp = (StructObjectInspector) inspector;
+ List extends StructField> fields = structInsp.getAllStructFieldRefs();
+ for (int i = 0; i < fields.size(); ++i) {
+ numWriters += getFlattenedColumnsCount(fields.get(i).getFieldObjectInspector());
+ }
+ break;
+ case MAP:
+ numWriters += 1;
+ MapObjectInspector mapInsp = (MapObjectInspector) inspector;
+ numWriters += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector());
+ numWriters += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector());
+ break;
+ case LIST:
+ numWriters += 1;
+ ListObjectInspector listInsp = (ListObjectInspector) inspector;
+ numWriters += getFlattenedColumnsCount(listInsp.getListElementObjectInspector());
+ break;
+ case UNION:
+ numWriters += 1;
+ UnionObjectInspector unionInsp = (UnionObjectInspector) inspector;
+ List choices = unionInsp.getObjectInspectors();
+ for (int i = 0; i < choices.size(); ++i) {
+ numWriters += getFlattenedColumnsCount(choices.get(i));
+ }
+ break;
+ default:
+ throw new IllegalArgumentException("Bad category: " +
+ inspector.getCategory());
+ }
+ return numWriters;
+ }
+
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index f7fce3f..0f68b5f 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -19,19 +19,7 @@
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_ZEROCOPY;
-import java.io.EOFException;
-import java.io.IOException;
-import java.math.BigDecimal;
-import java.math.BigInteger;
-import java.nio.ByteBuffer;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
+import com.google.common.collect.ComparisonChain;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.builder.HashCodeBuilder;
@@ -51,6 +39,7 @@
import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.io.filters.BloomFilter;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -74,7 +63,20 @@
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import com.google.common.collect.ComparisonChain;
+import java.io.EOFException;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
class RecordReaderImpl implements RecordReader {
@@ -2366,11 +2368,11 @@ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics index,
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(index);
Object minValue = getMin(cs);
Object maxValue = getMax(cs);
- return evaluatePredicateRange(predicate, minValue, maxValue);
+ return evaluatePredicateRange(predicate, minValue, maxValue, cs.getBloomFilter());
}
static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
- Object max) {
+ Object max, BloomFilter bloomFilter) {
// if we didn't have any values, everything must have been null
if (min == null) {
if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
@@ -2395,89 +2397,107 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
Object predObj = getBaseObjectForComparison(baseObj, minValue);
switch (predicate.getOperator()) {
- case NULL_SAFE_EQUALS:
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
- if (loc == Location.BEFORE || loc == Location.AFTER) {
- return TruthValue.NO;
- } else {
- return TruthValue.YES_NO;
- }
- case EQUALS:
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
- if (minValue.equals(maxValue) && loc == Location.MIN) {
- return TruthValue.YES_NULL;
- } else if (loc == Location.BEFORE || loc == Location.AFTER) {
- return TruthValue.NO_NULL;
- } else {
- return TruthValue.YES_NO_NULL;
- }
- case LESS_THAN:
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
- if (loc == Location.AFTER) {
- return TruthValue.YES_NULL;
- } else if (loc == Location.BEFORE || loc == Location.MIN) {
- return TruthValue.NO_NULL;
- } else {
- return TruthValue.YES_NO_NULL;
- }
- case LESS_THAN_EQUALS:
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
- if (loc == Location.AFTER || loc == Location.MAX) {
- return TruthValue.YES_NULL;
- } else if (loc == Location.BEFORE) {
- return TruthValue.NO_NULL;
- } else {
- return TruthValue.YES_NO_NULL;
- }
- case IN:
- if (minValue.equals(maxValue)) {
- // for a single value, look through to see if that value is in the
- // set
- for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
- predObj = getBaseObjectForComparison(arg, minValue);
+ case NULL_SAFE_EQUALS:
+ loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ if (loc == Location.BEFORE || loc == Location.AFTER) {
+ return TruthValue.NO;
+ } else {
+ return TruthValue.YES_NO;
+ }
+ case EQUALS:
+ if (bloomFilter != null) {
+ return checkInBloomFilter(bloomFilter, predObj);
+ } else {
loc = compareToRange((Comparable) predObj, minValue, maxValue);
- if (loc == Location.MIN) {
+ if (minValue.equals(maxValue) && loc == Location.MIN) {
return TruthValue.YES_NULL;
+ } else if (loc == Location.BEFORE || loc == Location.AFTER) {
+ return TruthValue.NO_NULL;
+ } else {
+ return TruthValue.YES_NO_NULL;
}
}
- return TruthValue.NO_NULL;
- } else {
- // are all of the values outside of the range?
- for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
- predObj = getBaseObjectForComparison(arg, minValue);
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
- if (loc == Location.MIN || loc == Location.MIDDLE ||
- loc == Location.MAX) {
- return TruthValue.YES_NO_NULL;
+ case LESS_THAN:
+ loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ if (loc == Location.AFTER) {
+ return TruthValue.YES_NULL;
+ } else if (loc == Location.BEFORE || loc == Location.MIN) {
+ return TruthValue.NO_NULL;
+ } else {
+ return TruthValue.YES_NO_NULL;
+ }
+ case LESS_THAN_EQUALS:
+ loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ if (loc == Location.AFTER || loc == Location.MAX) {
+ return TruthValue.YES_NULL;
+ } else if (loc == Location.BEFORE) {
+ return TruthValue.NO_NULL;
+ } else {
+ return TruthValue.YES_NO_NULL;
+ }
+ case IN:
+ if (minValue.equals(maxValue)) {
+ // for a single value, look through to see if that value is in the
+ // set
+ for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
+ predObj = getBaseObjectForComparison(arg, minValue);
+
+ if (bloomFilter != null) {
+ if (checkInBloomFilter(bloomFilter, predObj) != TruthValue.NO_NULL) {
+ return TruthValue.YES_NO_NULL;
+ }
+ } else {
+ loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ if (loc == Location.MIN) {
+ return TruthValue.YES_NULL;
+ }
+ }
}
+ return TruthValue.NO_NULL;
+ } else {
+ // are all of the values outside of the range?
+ for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
+ predObj = getBaseObjectForComparison(arg, minValue);
+
+ if (bloomFilter != null) {
+ if (checkInBloomFilter(bloomFilter, predObj) != TruthValue.NO_NULL) {
+ return TruthValue.YES_NO_NULL;
+ }
+ } else {
+ loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ if (loc == Location.MIN || loc == Location.MIDDLE ||
+ loc == Location.MAX) {
+ return TruthValue.YES_NO_NULL;
+ }
+ }
+ }
+ return TruthValue.NO_NULL;
}
- return TruthValue.NO_NULL;
- }
- case BETWEEN:
- List