stripes = reader.getStripes();
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
index b46937c..25bb15a 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
@@ -18,16 +18,23 @@
package org.apache.hadoop.hive.ql.io.orc;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT;
+
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.io.filters.BloomFilter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.*;
-
/**
* Contains factory methods to read or write ORC files.
*/
@@ -148,7 +155,9 @@ private WriterVersion(int id) {
ROW_INDEX_STRIDE("orc.row.index.stride"),
ENABLE_INDEXES("orc.create.index"),
BLOCK_PADDING("orc.block.padding"),
- ENCODING_STRATEGY("orc.encoding.strategy");
+ ENCODING_STRATEGY("orc.encoding.strategy"),
+ BLOOM_FILTER_COLUMNS("orc.bloom.filter.columns"),
+ BLOOM_FILTER_FPP("orc.bloom.filter.fpp");
private final String propName;
@@ -256,6 +265,8 @@ public static Reader createReader(Path path,
private EncodingStrategy encodingStrategy;
private CompressionStrategy compressionStrategy;
private float paddingTolerance;
+ private String bloomFilterColumns;
+ private double bloomFilterFpp;
WriterOptions(Configuration conf) {
configuration = conf;
@@ -288,9 +299,9 @@ public static Reader createReader(Path path,
compressionStrategy = CompressionStrategy.valueOf(compString);
}
- paddingTolerance =
- conf.getFloat(HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.varname,
- HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.defaultFloatVal);
+ paddingTolerance = conf.getFloat(HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.varname,
+ HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.defaultFloatVal);
+ bloomFilterFpp = BloomFilter.DEFAULT_FPP;
}
/**
@@ -367,6 +378,24 @@ public WriterOptions paddingTolerance(float value) {
}
/**
+ * Comma separated values of column names for which bloom filter is to be created.
+ */
+ public WriterOptions bloomFilterColumns(String columns) {
+ bloomFilterColumns = columns;
+ return this;
+ }
+
+ /**
+ * Specify the false positive probability for bloom filter.
+ * @param fpp - false positive probability
+ * @return
+ */
+ public WriterOptions bloomFilterFpp(double fpp) {
+ bloomFilterFpp = fpp;
+ return this;
+ }
+
+ /**
* Sets the generic compression that is used to compress the data.
*/
public WriterOptions compress(CompressionKind value) {
@@ -438,8 +467,8 @@ public static Writer createWriter(Path path,
opts.memoryManagerValue, opts.blockPaddingValue,
opts.versionValue, opts.callback,
opts.encodingStrategy, opts.compressionStrategy,
- opts.paddingTolerance,
- opts.blockSizeValue);
+ opts.paddingTolerance, opts.blockSizeValue,
+ opts.bloomFilterColumns, opts.bloomFilterFpp);
}
/**
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 8c44e3e..498ee14 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -18,9 +18,18 @@
package org.apache.hadoop.hive.ql.io.orc;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -65,18 +74,9 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.TreeMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
* A MapReduce/Hive input format for ORC files.
*
@@ -920,7 +920,7 @@ private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics,
// column statistics at index 0 contains only the number of rows
ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
- truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred));
+ truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
} else {
// parition column case.
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
index 5bd3f0c..8625ff1 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
@@ -17,12 +17,17 @@
*/
package org.apache.hadoop.hive.ql.io.orc;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Properties;
+
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter;
import org.apache.hadoop.hive.ql.io.RecordUpdater;
+import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde.OrcSerdeRow;
import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -41,11 +46,6 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Progressable;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Properties;
-
/**
* A Hive OutputFormat for ORC files.
*/
@@ -170,6 +170,16 @@ private String getSettingFromPropsFallingBackToConf(String key, Properties props
options.encodingStrategy(EncodingStrategy.valueOf(propVal));
}
+ if ((propVal = getSettingFromPropsFallingBackToConf(
+ OrcFile.OrcTableProperties.BLOOM_FILTER_COLUMNS.getPropName(), props, conf)) != null) {
+ options.bloomFilterColumns(propVal);
+ }
+
+ if ((propVal = getSettingFromPropsFallingBackToConf(
+ OrcFile.OrcTableProperties.BLOOM_FILTER_FPP.getPropName(), props, conf)) != null) {
+ options.bloomFilterFpp(Double.parseDouble(propVal));
+ }
+
return options;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
new file mode 100644
index 0000000..ba59b35
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
@@ -0,0 +1,203 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
+
+import com.google.common.collect.Lists;
+
+public class OrcUtils {
+ private static final Log LOG = LogFactory.getLog(OrcUtils.class);
+
+ /**
+ * Returns selected columns as a boolean array with true value set for specified column names.
+ * The result will contain number of elements equal to flattened number of columns.
+ * For example:
+ * selectedColumns - a,b,c
+ * allColumns - a,b,c,d
+ * If column c is a complex type, say list and other types are primitives then result will
+ * be [false, true, true, true, true, true, false]
+ * Index 0 is the root element of the struct which is set to false by default, index 1,2
+ * corresponds to columns a and b. Index 3,4 correspond to column c which is list and
+ * index 5 correspond to column d. After flattening list gets 2 columns.
+ *
+ * @param selectedColumns - comma separated list of selected column names
+ * @param allColumns - comma separated list of all column names
+ * @param inspector - object inspector
+ * @return - boolean array with true value set for the specified column names
+ */
+ public static boolean[] includeColumns(String selectedColumns, String allColumns,
+ ObjectInspector inspector) {
+ int numFlattenedCols = getFlattenedColumnsCount(inspector);
+ boolean[] results = new boolean[numFlattenedCols];
+ if (selectedColumns != null && !selectedColumns.isEmpty()) {
+ includeColumnsImpl(results, selectedColumns, allColumns, inspector);
+ }
+ return results;
+ }
+
+ private static void includeColumnsImpl(boolean[] includeColumns, String selectedColumns,
+ String allColumns,
+ ObjectInspector inspector) {
+ Map> columnSpanMap = getColumnSpan(allColumns, inspector);
+ LOG.info("columnSpanMap: " + columnSpanMap);
+
+ String[] selCols = selectedColumns.split(",");
+ for (String sc : selCols) {
+ if (columnSpanMap.containsKey(sc)) {
+ List colSpan = columnSpanMap.get(sc);
+ int start = colSpan.get(0);
+ int end = colSpan.get(1);
+ for (int i = start; i <= end; i++) {
+ includeColumns[i] = true;
+ }
+ }
+ }
+
+ LOG.info("includeColumns: " + Arrays.toString(includeColumns));
+ }
+
+ private static Map> getColumnSpan(String allColumns,
+ ObjectInspector inspector) {
+ // map that contains the column span for each column. Column span is the number of columns
+ // required after flattening. For a given object inspector this map contains the start column
+ // id and end column id (both inclusive) after flattening.
+ // EXAMPLE:
+ // schema: struct>
+ // column span map for the above struct will be
+ // a => [1,1], b => [2,2], c => [3,5]
+ Map> columnSpanMap = new HashMap>();
+ if (allColumns != null) {
+ String[] columns = allColumns.split(",");
+ int startIdx = 0;
+ int endIdx = 0;
+ if (inspector instanceof StructObjectInspector) {
+ StructObjectInspector soi = (StructObjectInspector) inspector;
+ List extends StructField> fields = soi.getAllStructFieldRefs();
+ for (int i = 0; i < fields.size(); i++) {
+ StructField sf = fields.get(i);
+
+ // we get the type (category) from object inspector but column name from the argument.
+ // The reason for this is hive (FileSinkOperator) does not pass the actual column names,
+ // instead it passes the internal column names (_col1,_col2).
+ ObjectInspector sfOI = sf.getFieldObjectInspector();
+ String colName = columns[i];
+
+ startIdx = endIdx + 1;
+ switch (sfOI.getCategory()) {
+ case PRIMITIVE:
+ endIdx += 1;
+ break;
+ case STRUCT:
+ endIdx += 1;
+ StructObjectInspector structInsp = (StructObjectInspector) sfOI;
+ List extends StructField> structFields = structInsp.getAllStructFieldRefs();
+ for (int j = 0; j < structFields.size(); ++j) {
+ endIdx += getFlattenedColumnsCount(structFields.get(j).getFieldObjectInspector());
+ }
+ break;
+ case MAP:
+ endIdx += 1;
+ MapObjectInspector mapInsp = (MapObjectInspector) sfOI;
+ endIdx += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector());
+ endIdx += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector());
+ break;
+ case LIST:
+ endIdx += 1;
+ ListObjectInspector listInsp = (ListObjectInspector) sfOI;
+ endIdx += getFlattenedColumnsCount(listInsp.getListElementObjectInspector());
+ break;
+ case UNION:
+ endIdx += 1;
+ UnionObjectInspector unionInsp = (UnionObjectInspector) sfOI;
+ List choices = unionInsp.getObjectInspectors();
+ for (int j = 0; j < choices.size(); ++j) {
+ endIdx += getFlattenedColumnsCount(choices.get(j));
+ }
+ break;
+ default:
+ throw new IllegalArgumentException("Bad category: " +
+ inspector.getCategory());
+ }
+
+ columnSpanMap.put(colName, Lists.newArrayList(startIdx, endIdx));
+ }
+ }
+ }
+ return columnSpanMap;
+ }
+
+ /**
+ * Returns the number of columns after flatting complex types.
+ *
+ * @param inspector - object inspector
+ * @return
+ */
+ public static int getFlattenedColumnsCount(ObjectInspector inspector) {
+ int numWriters = 0;
+ switch (inspector.getCategory()) {
+ case PRIMITIVE:
+ numWriters += 1;
+ break;
+ case STRUCT:
+ numWriters += 1;
+ StructObjectInspector structInsp = (StructObjectInspector) inspector;
+ List extends StructField> fields = structInsp.getAllStructFieldRefs();
+ for (int i = 0; i < fields.size(); ++i) {
+ numWriters += getFlattenedColumnsCount(fields.get(i).getFieldObjectInspector());
+ }
+ break;
+ case MAP:
+ numWriters += 1;
+ MapObjectInspector mapInsp = (MapObjectInspector) inspector;
+ numWriters += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector());
+ numWriters += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector());
+ break;
+ case LIST:
+ numWriters += 1;
+ ListObjectInspector listInsp = (ListObjectInspector) inspector;
+ numWriters += getFlattenedColumnsCount(listInsp.getListElementObjectInspector());
+ break;
+ case UNION:
+ numWriters += 1;
+ UnionObjectInspector unionInsp = (UnionObjectInspector) inspector;
+ List choices = unionInsp.getObjectInspectors();
+ for (int i = 0; i < choices.size(); ++i) {
+ numWriters += getFlattenedColumnsCount(choices.get(i));
+ }
+ break;
+ default:
+ throw new IllegalArgumentException("Bad category: " +
+ inspector.getCategory());
+ }
+ return numWriters;
+ }
+
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index 21e59dd..136d575 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -24,6 +24,7 @@
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
+import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
@@ -35,6 +36,7 @@
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -51,6 +53,7 @@
import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.io.filters.BloomFilter;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -101,11 +104,14 @@
List bufferChunks = new ArrayList(0);
private final TreeReader reader;
private final OrcProto.RowIndex[] indexes;
+ private final OrcProto.BloomFilterIndex[] bloomFilterIndices;
private final SearchArgument sarg;
// the leaf predicates for the sarg
private final List sargLeaves;
// an array the same length as the sargLeaves that map them to column ids
private final int[] filterColumns;
+ // same as the above array, but indices are set to true
+ private final boolean[] sargColumns;
// an array about which row groups aren't skipped
private boolean[] includedRowGroups = null;
private final Configuration conf;
@@ -113,6 +119,24 @@
private final ByteBufferAllocatorPool pool = new ByteBufferAllocatorPool();
private final ZeroCopyReaderShim zcr;
+ public final static class Index {
+ OrcProto.RowIndex[] rowGroupIndex;
+ OrcProto.BloomFilterIndex[] bloomFilterIndex;
+
+ Index(OrcProto.RowIndex[] rgIndex, OrcProto.BloomFilterIndex[] bfIndex) {
+ this.rowGroupIndex = rgIndex;
+ this.bloomFilterIndex = bfIndex;
+ }
+
+ public OrcProto.RowIndex[] getRowGroupIndex() {
+ return rowGroupIndex;
+ }
+
+ public OrcProto.BloomFilterIndex[] getBloomFilterIndex() {
+ return bloomFilterIndex;
+ }
+ }
+
// this is an implementation copied from ElasticByteBufferPool in hadoop-2,
// which lacks a clear()/clean() operation
public final static class ByteBufferAllocatorPool implements ByteBufferPoolShim {
@@ -251,9 +275,18 @@ static int findColumns(String[] columnNames,
if (sarg != null) {
sargLeaves = sarg.getLeaves();
filterColumns = mapSargColumns(sargLeaves, options.getColumnNames(), 0);
+ // included will not be null, row options will fill the array with trues if null
+ sargColumns = new boolean[included.length];
+ for (int i : filterColumns) {
+ // filter columns may have -1 as index which could be partition column in SARG.
+ if (i > 0) {
+ sargColumns[i] = true;
+ }
+ }
} else {
sargLeaves = null;
filterColumns = null;
+ sargColumns = null;
}
long rows = 0;
long skippedRows = 0;
@@ -285,6 +318,7 @@ static int findColumns(String[] columnNames,
totalRowCount = rows;
reader = createTreeReader(path, 0, types, included, conf);
indexes = new OrcProto.RowIndex[types.size()];
+ bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()];
rowIndexStride = strideRate;
advanceToNextRow(0L);
}
@@ -1387,7 +1421,7 @@ void skipRows(long items) throws IOException {
}
len -= bytesRead;
offset += bytesRead;
- }
+ }
return allBytes;
}
@@ -1741,7 +1775,7 @@ Object nextVector(Object previousVector, long batchSize) throws IOException {
}
}
} else {
- if (result.noNulls){
+ if (result.noNulls){
for (int i = 0; i < batchSize; i++) {
adjustedDownLen = StringExpr.rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i], maxLength);
if (adjustedDownLen < result.length[i]) {
@@ -1805,7 +1839,7 @@ Object nextVector(Object previousVector, long batchSize) throws IOException {
}
}
} else {
- if (result.noNulls){
+ if (result.noNulls){
for (int i = 0; i < batchSize; i++) {
adjustedDownLen = StringExpr.truncate(result.vector[i], result.start[i], result.length[i], maxLength);
if (adjustedDownLen < result.length[i]) {
@@ -2362,15 +2396,20 @@ static Object getMin(ColumnStatistics index) {
* that is referenced in the predicate.
* @param statsProto the statistics for the column mentioned in the predicate
* @param predicate the leaf predicate we need to evaluation
+ * @param bloomFilter
* @return the set of truth values that may be returned for the given
* predicate.
*/
- static TruthValue evaluatePredicate(OrcProto.ColumnStatistics statsProto,
- PredicateLeaf predicate) {
+ static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto,
+ PredicateLeaf predicate, OrcProto.BloomFilter bloomFilter) {
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(statsProto);
Object minValue = getMin(cs);
Object maxValue = getMax(cs);
- return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull());
+ BloomFilter bf = null;
+ if (bloomFilter != null) {
+ bf = new BloomFilter(bloomFilter);
+ }
+ return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull(), bf);
}
/**
@@ -2382,14 +2421,14 @@ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics statsProto,
* predicate.
*/
static TruthValue evaluatePredicate(ColumnStatistics stats,
- PredicateLeaf predicate) {
+ PredicateLeaf predicate, BloomFilter bloomFilter) {
Object minValue = getMin(stats);
Object maxValue = getMax(stats);
- return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull());
+ return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter);
}
static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
- Object max, boolean hasNull) {
+ Object max, boolean hasNull, BloomFilter bloomFilter) {
// if we didn't have any values, everything must have been null
if (min == null) {
if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
@@ -2399,21 +2438,42 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
}
}
- Location loc;
+ TruthValue result;
+ // Predicate object and stats object can be one of the following base types
+ // LONG, DOUBLE, STRING, DATE, DECIMAL
+ // Out of these DATE is not implicitly convertible to other types and rest
+ // others are implicitly convertible. In cases where DATE cannot be converted
+ // the stats object is converted to text and comparison is performed.
+ // When STRINGs are converted to other base types, NumberFormat exception
+ // can occur in which case TruthValue.YES_NO_NULL value is returned
try {
- // Predicate object and stats object can be one of the following base types
- // LONG, DOUBLE, STRING, DATE, DECIMAL
- // Out of these DATE is not implicitly convertible to other types and rest
- // others are implicitly convertible. In cases where DATE cannot be converted
- // the stats object is converted to text and comparison is performed.
- // When STRINGs are converted to other base types, NumberFormat exception
- // can occur in which case TruthValue.YES_NO_NULL value is returned
Object baseObj = predicate.getLiteral(PredicateLeaf.FileFormat.ORC);
Object minValue = getConvertedStatsObj(min, baseObj);
Object maxValue = getConvertedStatsObj(max, baseObj);
Object predObj = getBaseObjectForComparison(baseObj, minValue);
- switch (predicate.getOperator()) {
+ result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull);
+ if (bloomFilter != null && result != TruthValue.NO_NULL && result != TruthValue.NO) {
+ result = evaluatePredicateBloomFilter(predicate, predObj, bloomFilter, hasNull);
+ }
+ // in case failed conversion, return the default YES_NO_NULL truth value
+ } catch (NumberFormatException nfe) {
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("NumberFormatException when type matching predicate object" +
+ " and statistics object. Exception: " + ExceptionUtils.getStackTrace(nfe));
+ }
+ result = hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ }
+ return result;
+ }
+
+ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Object predObj,
+ Object minValue,
+ Object maxValue,
+ boolean hasNull) {
+ Location loc;
+
+ switch (predicate.getOperator()) {
case NULL_SAFE_EQUALS:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.BEFORE || loc == Location.AFTER) {
@@ -2498,12 +2558,81 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
return hasNull ? TruthValue.YES_NO : TruthValue.NO;
default:
return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ }
+ }
+
+ private static TruthValue evaluatePredicateBloomFilter(PredicateLeaf predicate, Object predObj,
+ BloomFilter bloomFilter, boolean hasNull) {
+ switch (predicate.getOperator()) {
+ case NULL_SAFE_EQUALS:
+ // null safe equals does not return *_NULL variant. So set hasNull to false
+ return checkInBloomFilter(bloomFilter, predObj, false);
+ case EQUALS:
+ return checkInBloomFilter(bloomFilter, predObj, hasNull);
+ case IN:
+ for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
+ // if atleast one value in IN list exist in bloom filter, qualify the row group/stripe
+ TruthValue result = checkInBloomFilter(bloomFilter, arg, hasNull);
+ if (result == TruthValue.YES_NO_NULL || result == TruthValue.YES_NO) {
+ return result;
+ }
+ }
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+ default:
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ }
+ }
+
+ private static TruthValue checkInBloomFilter(BloomFilter bf, Object predObj, boolean hasNull) {
+ TruthValue result = hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+
+ if (predObj instanceof Long) {
+ if (bf.testLong(((Long) predObj).longValue())) {
+ result = TruthValue.YES_NO_NULL;
}
+ } else if (predObj instanceof Double) {
+ if (bf.testDouble(((Double) predObj).doubleValue())) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else if (predObj instanceof String || predObj instanceof Text ||
+ predObj instanceof HiveDecimal || predObj instanceof BigDecimal) {
+ if (bf.testString(predObj.toString())) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else if (predObj instanceof Date) {
+ if (bf.testLong(DateWritable.dateToDays((Date) predObj))) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else if (predObj instanceof DateWritable) {
+ if (bf.testLong(((DateWritable) predObj).getDays())) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else if (predObj instanceof Timestamp) {
+ if (bf.testLong(((Timestamp) predObj).getTime())) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else if (predObj instanceof TimestampWritable) {
+ if (bf.testLong(((TimestampWritable) predObj).getTimestamp().getTime())) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else {
+ // if the predicate object is null and if hasNull says there are no nulls then return NO
+ if (predObj == null && !hasNull) {
+ result = TruthValue.NO;
+ } else {
+ result = TruthValue.YES_NO_NULL;
+ }
+ }
- // in case failed conversion, return the default YES_NO_NULL truth value
- } catch (NumberFormatException nfe) {
- return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ if (result == TruthValue.YES_NO_NULL && !hasNull) {
+ result = TruthValue.YES_NO;
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Bloom filter evaluation: " + result.toString());
}
+
+ return result;
}
private static Object getBaseObjectForComparison(Object predObj, Object statsObj) {
@@ -2571,7 +2700,7 @@ private static Object getConvertedStatsObj(Object statsObj, Object predObj) {
if (sarg == null || rowIndexStride == 0) {
return null;
}
- readRowIndex(currentStripe);
+ readRowIndex(currentStripe, sargColumns);
long rowsInStripe = stripes.get(currentStripe).getNumberOfRows();
int groupsInStripe = (int) ((rowsInStripe + rowIndexStride - 1) /
rowIndexStride);
@@ -2582,7 +2711,11 @@ private static Object getConvertedStatsObj(Object statsObj, Object predObj) {
if (filterColumns[pred] != -1) {
OrcProto.ColumnStatistics stats =
indexes[filterColumns[pred]].getEntry(rowGroup).getStatistics();
- leafValues[pred] = evaluatePredicate(stats, sargLeaves.get(pred));
+ OrcProto.BloomFilter bf = null;
+ if (bloomFilterIndices[filterColumns[pred]] != null) {
+ bf = bloomFilterIndices[filterColumns[pred]].getBloomFilter(rowGroup);
+ }
+ leafValues[pred] = evaluatePredicateProto(stats, sargLeaves.get(pred), bf);
if (LOG.isDebugEnabled()) {
LOG.debug("Stats = " + stats);
LOG.debug("Setting " + sargLeaves.get(pred) + " to " +
@@ -3225,7 +3358,7 @@ private int findStripe(long rowNumber) {
throw new IllegalArgumentException("Seek after the end of reader range");
}
- OrcProto.RowIndex[] readRowIndex(int stripeIndex) throws IOException {
+ Index readRowIndex(int stripeIndex, boolean[] sargColumns) throws IOException {
long offset = stripes.get(stripeIndex).getOffset();
OrcProto.StripeFooter stripeFooter;
OrcProto.RowIndex[] indexes;
@@ -3237,21 +3370,45 @@ private int findStripe(long rowNumber) {
stripeFooter = readStripeFooter(stripes.get(stripeIndex));
indexes = new OrcProto.RowIndex[this.indexes.length];
}
- for(OrcProto.Stream stream: stripeFooter.getStreamsList()) {
+ List streams = stripeFooter.getStreamsList();
+ for (int i = 0; i < streams.size(); i++) {
+ OrcProto.Stream stream = streams.get(i);
+ OrcProto.Stream nextStream = null;
+ if (i < streams.size() - 1) {
+ nextStream = streams.get(i+1);
+ }
+ int col = stream.getColumn();
+ int len = (int) stream.getLength();
+ // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
+ // filter and combine the io to read row index and bloom filters for that column together
if (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX) {
- int col = stream.getColumn();
+ boolean readBloomFilter = false;
+ if (sargColumns != null && sargColumns[col] &&
+ nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) {
+ len += nextStream.getLength();
+ i += 1;
+ readBloomFilter = true;
+ }
if ((included == null || included[col]) && indexes[col] == null) {
- byte[] buffer = new byte[(int) stream.getLength()];
+ byte[] buffer = new byte[len];
file.seek(offset);
file.readFully(buffer);
+ ByteBuffer[] bb = new ByteBuffer[] {ByteBuffer.wrap(buffer)};
indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create("index",
- new ByteBuffer[] {ByteBuffer.wrap(buffer)}, new long[]{0},
- stream.getLength(), codec, bufferSize));
+ bb, new long[]{0}, stream.getLength(), codec, bufferSize));
+ if (readBloomFilter) {
+ bb[0].position((int) stream.getLength());
+ bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom(
+ InStream.create("bloom_filter", bb, new long[]{0}, nextStream.getLength(),
+ codec, bufferSize));
+ }
}
}
- offset += stream.getLength();
+ offset += len;
}
- return indexes;
+
+ Index index = new Index(indexes, bloomFilterIndices);
+ return index;
}
private void seekToRowEntry(int rowEntry) throws IOException {
@@ -3283,7 +3440,7 @@ public void seekToRow(long rowNumber) throws IOException {
currentStripe = rightStripe;
readStripe();
}
- readRowIndex(currentStripe);
+ readRowIndex(currentStripe, sargColumns);
// if we aren't to the right row yet, advanance in the stripe.
advanceToNextRow(rowNumber);
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
index 3d44954..6215791 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
@@ -75,6 +75,7 @@ public static Area getArea(OrcProto.Stream.Kind kind) {
switch (kind) {
case ROW_INDEX:
case DICTIONARY_COUNT:
+ case BLOOM_FILTER:
return Area.INDEX;
default:
return Area.DATA;
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
index 2d000ec..9f24190 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
@@ -20,10 +20,16 @@
import static com.google.common.base.Preconditions.checkArgument;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Lists;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.CodedOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.management.ManagementFactory;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -34,6 +40,7 @@
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.IOConstants;
+import org.apache.hadoop.hive.ql.io.filters.BloomFilter;
import org.apache.hadoop.hive.ql.io.orc.CompressionCodec.Modifier;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy;
@@ -70,16 +77,12 @@
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.lang.management.ManagementFactory;
-import java.nio.ByteBuffer;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.EnumSet;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
+import com.google.common.primitives.Longs;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.CodedOutputStream;
/**
* An ORC file writer. The file is divided into stripes, which is the natural
@@ -145,23 +148,27 @@
private final OrcFile.WriterContext callbackContext;
private final OrcFile.EncodingStrategy encodingStrategy;
private final OrcFile.CompressionStrategy compressionStrategy;
+ private final boolean[] bloomFilterColumns;
+ private final double bloomFilterFpp;
WriterImpl(FileSystem fs,
- Path path,
- Configuration conf,
- ObjectInspector inspector,
- long stripeSize,
- CompressionKind compress,
- int bufferSize,
- int rowIndexStride,
- MemoryManager memoryManager,
- boolean addBlockPadding,
- OrcFile.Version version,
- OrcFile.WriterCallback callback,
- OrcFile.EncodingStrategy encodingStrategy,
- CompressionStrategy compressionStrategy,
- float paddingTolerance,
- long blockSizeValue) throws IOException {
+ Path path,
+ Configuration conf,
+ ObjectInspector inspector,
+ long stripeSize,
+ CompressionKind compress,
+ int bufferSize,
+ int rowIndexStride,
+ MemoryManager memoryManager,
+ boolean addBlockPadding,
+ OrcFile.Version version,
+ OrcFile.WriterCallback callback,
+ EncodingStrategy encodingStrategy,
+ CompressionStrategy compressionStrategy,
+ float paddingTolerance,
+ long blockSizeValue,
+ String bloomFilterColumnNames,
+ double bloomFilterFpp) throws IOException {
this.fs = fs;
this.path = path;
this.conf = conf;
@@ -190,7 +197,13 @@ public Writer getWriter() {
this.memoryManager = memoryManager;
buildIndex = rowIndexStride > 0;
codec = createCodec(compress);
- this.bufferSize = getEstimatedBufferSize(bufferSize);
+ String allColumns = conf.get(IOConstants.COLUMNS);
+ if (allColumns == null) {
+ allColumns = getColumnNamesFromInspector(inspector);
+ }
+ this.bufferSize = getEstimatedBufferSize(allColumns, bufferSize);
+ this.bloomFilterColumns = OrcUtils.includeColumns(bloomFilterColumnNames, allColumns, inspector);
+ this.bloomFilterFpp = bloomFilterFpp;
treeWriter = createTreeWriter(inspector, streamFactory, false);
if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
throw new IllegalArgumentException("Row stride must be at least " +
@@ -201,8 +214,25 @@ public Writer getWriter() {
memoryManager.addWriter(path, stripeSize, this);
}
+ private String getColumnNamesFromInspector(ObjectInspector inspector) {
+ List fieldNames = Lists.newArrayList();
+ Joiner joiner = Joiner.on(",");
+ if (inspector instanceof StructObjectInspector) {
+ StructObjectInspector soi = (StructObjectInspector) inspector;
+ List extends StructField> fields = soi.getAllStructFieldRefs();
+ for(StructField sf : fields) {
+ fieldNames.add(sf.getFieldName());
+ }
+ }
+ return joiner.join(fieldNames);
+ }
+
+ @VisibleForTesting
int getEstimatedBufferSize(int bs) {
- String colNames = conf.get(IOConstants.COLUMNS);
+ return getEstimatedBufferSize(conf.get(IOConstants.COLUMNS), bs);
+ }
+
+ int getEstimatedBufferSize(String colNames, int bs) {
long availableMem = getMemoryAvailableForORC();
if (colNames != null) {
final int numCols = colNames.split(",").length;
@@ -459,26 +489,27 @@ public OutStream createStream(int column,
final EnumSet modifiers;
switch (kind) {
- case DATA:
- case DICTIONARY_DATA:
- if (getCompressionStrategy() == CompressionStrategy.SPEED) {
- modifiers = EnumSet.of(Modifier.FAST, Modifier.TEXT);
- } else {
- modifiers = EnumSet.of(Modifier.DEFAULT, Modifier.TEXT);
- }
- break;
- case LENGTH:
- case DICTIONARY_COUNT:
- case PRESENT:
- case ROW_INDEX:
- case SECONDARY:
- // easily compressed using the fastest modes
- modifiers = EnumSet.of(Modifier.FASTEST, Modifier.BINARY);
- break;
- default:
- LOG.warn("Missing ORC compression modifiers for " + kind);
- modifiers = null;
- break;
+ case BLOOM_FILTER:
+ case DATA:
+ case DICTIONARY_DATA:
+ if (getCompressionStrategy() == CompressionStrategy.SPEED) {
+ modifiers = EnumSet.of(Modifier.FAST, Modifier.TEXT);
+ } else {
+ modifiers = EnumSet.of(Modifier.DEFAULT, Modifier.TEXT);
+ }
+ break;
+ case LENGTH:
+ case DICTIONARY_COUNT:
+ case PRESENT:
+ case ROW_INDEX:
+ case SECONDARY:
+ // easily compressed using the fastest modes
+ modifiers = EnumSet.of(Modifier.FASTEST, Modifier.BINARY);
+ break;
+ default:
+ LOG.warn("Missing ORC compression modifiers for " + kind);
+ modifiers = null;
+ break;
}
BufferedStream result = streams.get(name);
@@ -499,6 +530,15 @@ public int getNextColumnId() {
}
/**
+ * Get the current column id. After creating all tree writers this count should tell how many
+ * columns (including columns within nested complex objects) are created in total.
+ * @return current column id
+ */
+ public int getCurrentColumnId() {
+ return columnCount;
+ }
+
+ /**
* Get the stride rate of the row index.
*/
public int getRowIndexStride() {
@@ -538,6 +578,22 @@ public CompressionStrategy getCompressionStrategy() {
}
/**
+ * Get the bloom filter columns
+ * @return bloom filter columns
+ */
+ public boolean[] getBloomFilterColumns() {
+ return bloomFilterColumns;
+ }
+
+ /**
+ * Get bloom filter false positive percentage.
+ * @return fpp
+ */
+ public double getBloomFilterFPP() {
+ return bloomFilterFpp;
+ }
+
+ /**
* Get the writer's configuration.
* @return configuration
*/
@@ -572,6 +628,11 @@ public Configuration getConfiguration() {
private final OrcProto.RowIndex.Builder rowIndex;
private final OrcProto.RowIndexEntry.Builder rowIndexEntry;
private final PositionedOutputStream rowIndexStream;
+ private final PositionedOutputStream bloomFilterStream;
+ protected final BloomFilter bloomFilter;
+ protected final boolean createBloomFilter;
+ private final OrcProto.BloomFilterIndex.Builder bloomFilterIndex;
+ private final OrcProto.BloomFilter.Builder bloomFilterEntry;
private boolean foundNulls;
private OutStream isPresentOutStream;
private final List stripeStatsBuilders;
@@ -598,6 +659,7 @@ public Configuration getConfiguration() {
isPresent = null;
}
this.foundNulls = false;
+ createBloomFilter = streamFactory.getBloomFilterColumns()[columnId];
indexStatistics = ColumnStatisticsImpl.create(inspector);
stripeColStatistics = ColumnStatisticsImpl.create(inspector);
fileStatistics = ColumnStatisticsImpl.create(inspector);
@@ -607,11 +669,22 @@ public Configuration getConfiguration() {
rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry);
stripeStatsBuilders = Lists.newArrayList();
if (streamFactory.buildIndex()) {
- rowIndexStream = streamFactory.createStream(id,
- OrcProto.Stream.Kind.ROW_INDEX);
+ rowIndexStream = streamFactory.createStream(id, OrcProto.Stream.Kind.ROW_INDEX);
} else {
rowIndexStream = null;
}
+ if (createBloomFilter) {
+ bloomFilterEntry = OrcProto.BloomFilter.newBuilder();
+ bloomFilterIndex = OrcProto.BloomFilterIndex.newBuilder();
+ bloomFilterStream = streamFactory.createStream(id, OrcProto.Stream.Kind.BLOOM_FILTER);
+ bloomFilter = new BloomFilter(streamFactory.getRowIndexStride(),
+ streamFactory.getBloomFilterFPP());
+ } else {
+ bloomFilterEntry = null;
+ bloomFilterIndex = null;
+ bloomFilterStream = null;
+ bloomFilter = null;
+ }
}
protected OrcProto.RowIndex.Builder getRowIndex() {
@@ -725,6 +798,14 @@ void writeStripe(OrcProto.StripeFooter.Builder builder,
}
rowIndex.clear();
rowIndexEntry.clear();
+
+ // write the bloom filter to out stream
+ if (bloomFilterStream != null) {
+ bloomFilterIndex.build().writeTo(bloomFilterStream);
+ bloomFilterStream.flush();
+ bloomFilterIndex.clear();
+ bloomFilterEntry.clear();
+ }
}
private void writeStripeStatistics(OrcProto.StripeStatistics.Builder builder,
@@ -763,12 +844,23 @@ void createRowIndexEntry() throws IOException {
indexStatistics.reset();
rowIndex.addEntry(rowIndexEntry);
rowIndexEntry.clear();
+ addBloomFilterEntry();
recordPosition(rowIndexPosition);
for(TreeWriter child: childrenWriters) {
child.createRowIndexEntry();
}
}
+ void addBloomFilterEntry() {
+ if (createBloomFilter) {
+ bloomFilterEntry.setNumHashFunctions(bloomFilter.getNumHashFunctions());
+ bloomFilterEntry.addAllBitset(Longs.asList(bloomFilter.getBitSet()));
+ bloomFilterIndex.addBloomFilter(bloomFilterEntry.build());
+ bloomFilter.reset();
+ bloomFilterEntry.clear();
+ }
+ }
+
/**
* Record the current position in each of this column's streams.
* @param recorder where should the locations be recorded
@@ -851,6 +943,9 @@ void write(Object obj) throws IOException {
if (obj != null) {
byte val = ((ByteObjectInspector) inspector).get(obj);
indexStatistics.updateInteger(val);
+ if (createBloomFilter) {
+ bloomFilter.addLong(val);
+ }
writer.write(val);
}
}
@@ -926,6 +1021,10 @@ void write(Object obj) throws IOException {
val = shortInspector.get(obj);
}
indexStatistics.updateInteger(val);
+ if (createBloomFilter) {
+ // integers are converted to longs in column statistics and during SARG evaluation
+ bloomFilter.addLong(val);
+ }
writer.write(val);
}
}
@@ -966,6 +1065,10 @@ void write(Object obj) throws IOException {
if (obj != null) {
float val = ((FloatObjectInspector) inspector).get(obj);
indexStatistics.updateDouble(val);
+ if (createBloomFilter) {
+ // floats are converted to doubles in column statistics and during SARG evaluation
+ bloomFilter.addDouble(val);
+ }
utils.writeFloat(stream, val);
}
}
@@ -1006,6 +1109,9 @@ void write(Object obj) throws IOException {
if (obj != null) {
double val = ((DoubleObjectInspector) inspector).get(obj);
indexStatistics.updateDouble(val);
+ if (createBloomFilter) {
+ bloomFilter.addDouble(val);
+ }
utils.writeDouble(stream, val);
}
}
@@ -1099,6 +1205,9 @@ void write(Object obj) throws IOException {
directLengthOutput.write(val.getLength());
}
indexStatistics.updateString(val);
+ if (createBloomFilter) {
+ bloomFilter.addBytes(val.getBytes(), val.getLength());
+ }
}
}
@@ -1258,6 +1367,7 @@ void createRowIndexEntry() throws IOException {
OrcProto.RowIndexEntry base = rowIndexEntry.build();
savedRowIndex.add(base);
rowIndexEntry.clear();
+ addBloomFilterEntry();
recordPosition(rowIndexPosition);
rowIndexValueCount.add(Long.valueOf(rows.size()));
if (strideDictionaryCheck) {
@@ -1368,6 +1478,9 @@ void write(Object obj) throws IOException {
stream.write(val.getBytes(), 0, val.getLength());
length.write(val.getLength());
indexStatistics.updateBinary(val);
+ if (createBloomFilter) {
+ bloomFilter.addBytes(val.getBytes(), val.getLength());
+ }
}
}
@@ -1430,6 +1543,9 @@ void write(Object obj) throws IOException {
indexStatistics.updateTimestamp(val);
seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP);
nanos.write(formatNanos(val.getNanos()));
+ if (createBloomFilter) {
+ bloomFilter.addLong(val.getTime());
+ }
}
}
@@ -1490,6 +1606,9 @@ void write(Object obj) throws IOException {
DateWritable val = ((DateObjectInspector) inspector).getPrimitiveWritableObject(obj);
indexStatistics.updateDate(val);
writer.write(val.getDays());
+ if (createBloomFilter) {
+ bloomFilter.addLong(val.getDays());
+ }
}
}
@@ -1558,6 +1677,9 @@ void write(Object obj) throws IOException {
decimal.unscaledValue());
scaleStream.write(decimal.scale());
indexStatistics.updateDecimal(decimal);
+ if (createBloomFilter) {
+ bloomFilter.addString(decimal.toString());
+ }
}
}
@@ -1657,6 +1779,9 @@ void write(Object obj) throws IOException {
ListObjectInspector insp = (ListObjectInspector) inspector;
int len = insp.getListLength(obj);
lengths.write(len);
+ if (createBloomFilter) {
+ bloomFilter.addLong(len);
+ }
for(int i=0; i < len; ++i) {
childrenWriters[0].write(insp.getListElement(obj, i));
}
@@ -1721,6 +1846,9 @@ void write(Object obj) throws IOException {
// accessor in the MapObjectInspector.
Map, ?> valueMap = insp.getMap(obj);
lengths.write(valueMap.size());
+ if (createBloomFilter) {
+ bloomFilter.addLong(valueMap.size());
+ }
for(Map.Entry, ?> entry: valueMap.entrySet()) {
childrenWriters[0].write(entry.getKey());
childrenWriters[1].write(entry.getValue());
@@ -1773,6 +1901,9 @@ void write(Object obj) throws IOException {
UnionObjectInspector insp = (UnionObjectInspector) inspector;
byte tag = insp.getTag(obj);
tags.write(tag);
+ if (createBloomFilter) {
+ bloomFilter.addLong(tag);
+ }
childrenWriters[tag].write(insp.getField(obj));
}
}
diff --git ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
index 98459fb..14a32e8 100644
--- ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
+++ ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
@@ -86,6 +86,15 @@ message RowIndex {
repeated RowIndexEntry entry = 1;
}
+message BloomFilter {
+ optional uint32 numHashFunctions = 1;
+ repeated fixed64 bitset = 2;
+}
+
+message BloomFilterIndex {
+ repeated BloomFilter bloomFilter = 1;
+}
+
message Stream {
// if you add new index stream kinds, you need to make sure to update
// StreamName to ensure it is added to the stripe in the right area
@@ -97,6 +106,7 @@ message Stream {
DICTIONARY_COUNT = 4;
SECONDARY = 5;
ROW_INDEX = 6;
+ BLOOM_FILTER = 7;
}
required Kind kind = 1;
optional uint32 column = 2;
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestBloomFilter.java ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestBloomFilter.java
new file mode 100644
index 0000000..32b95ab
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestBloomFilter.java
@@ -0,0 +1,458 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.filters;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Random;
+
+import org.junit.Test;
+
+/**
+ *
+ */
+public class TestBloomFilter {
+ private static final int COUNT = 100;
+ Random rand = new Random(123);
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testBloomIllegalArg1() {
+ BloomFilter bf = new BloomFilter(0, 0);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testBloomIllegalArg2() {
+ BloomFilter bf = new BloomFilter(0, 0.1);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testBloomIllegalArg3() {
+ BloomFilter bf = new BloomFilter(1, 0.0);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testBloomIllegalArg4() {
+ BloomFilter bf = new BloomFilter(1, 1.0);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testBloomIllegalArg5() {
+ BloomFilter bf = new BloomFilter(-1, -1);
+ }
+
+
+ @Test
+ public void testBloomNumBits() {
+ assertEquals(0, BloomFilter.optimalNumOfBits(0, 0));
+ assertEquals(0, BloomFilter.optimalNumOfBits(0, 1));
+ assertEquals(0, BloomFilter.optimalNumOfBits(1, 1));
+ assertEquals(7, BloomFilter.optimalNumOfBits(1, 0.03));
+ assertEquals(72, BloomFilter.optimalNumOfBits(10, 0.03));
+ assertEquals(729, BloomFilter.optimalNumOfBits(100, 0.03));
+ assertEquals(7298, BloomFilter.optimalNumOfBits(1000, 0.03));
+ assertEquals(72984, BloomFilter.optimalNumOfBits(10000, 0.03));
+ assertEquals(729844, BloomFilter.optimalNumOfBits(100000, 0.03));
+ assertEquals(7298440, BloomFilter.optimalNumOfBits(1000000, 0.03));
+ assertEquals(6235224, BloomFilter.optimalNumOfBits(1000000, 0.05));
+ }
+
+ @Test
+ public void testBloomNumHashFunctions() {
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(-1, -1));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(0, 0));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10, 0));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10, 10));
+ assertEquals(7, BloomFilter.optimalNumOfHashFunctions(10, 100));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(100, 100));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(1000, 100));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10000, 100));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(100000, 100));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(1000000, 100));
+ }
+
+ @Test
+ public void testBloomFilterBytes() {
+ BloomFilter bf = new BloomFilter(10000);
+ byte[] val = new byte[]{1, 2, 3};
+ byte[] val1 = new byte[]{1, 2, 3, 4};
+ byte[] val2 = new byte[]{1, 2, 3, 4, 5};
+ byte[] val3 = new byte[]{1, 2, 3, 4, 5, 6};
+
+ assertEquals(false, bf.test(val));
+ assertEquals(false, bf.test(val1));
+ assertEquals(false, bf.test(val2));
+ assertEquals(false, bf.test(val3));
+ bf.add(val);
+ assertEquals(true, bf.test(val));
+ assertEquals(false, bf.test(val1));
+ assertEquals(false, bf.test(val2));
+ assertEquals(false, bf.test(val3));
+ bf.add(val1);
+ assertEquals(true, bf.test(val));
+ assertEquals(true, bf.test(val1));
+ assertEquals(false, bf.test(val2));
+ assertEquals(false, bf.test(val3));
+ bf.add(val2);
+ assertEquals(true, bf.test(val));
+ assertEquals(true, bf.test(val1));
+ assertEquals(true, bf.test(val2));
+ assertEquals(false, bf.test(val3));
+ bf.add(val3);
+ assertEquals(true, bf.test(val));
+ assertEquals(true, bf.test(val1));
+ assertEquals(true, bf.test(val2));
+ assertEquals(true, bf.test(val3));
+
+ byte[] randVal = new byte[COUNT];
+ for (int i = 0; i < COUNT; i++) {
+ rand.nextBytes(randVal);
+ bf.add(randVal);
+ }
+ // last value should be present
+ assertEquals(true, bf.test(randVal));
+ // most likely this value should not exist
+ randVal[0] = 0;
+ randVal[1] = 0;
+ randVal[2] = 0;
+ randVal[3] = 0;
+ randVal[4] = 0;
+ assertEquals(false, bf.test(randVal));
+
+ assertEquals(7800, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testBloomFilterByte() {
+ BloomFilter bf = new BloomFilter(10000);
+ byte val = Byte.MIN_VALUE;
+ byte val1 = 1;
+ byte val2 = 2;
+ byte val3 = Byte.MAX_VALUE;
+
+ assertEquals(false, bf.testLong(val));
+ assertEquals(false, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(false, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val1);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val2);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(true, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val3);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(true, bf.testLong(val2));
+ assertEquals(true, bf.testLong(val3));
+
+ byte randVal = 0;
+ for (int i = 0; i < COUNT; i++) {
+ randVal = (byte) rand.nextInt(Byte.MAX_VALUE);
+ bf.addLong(randVal);
+ }
+ // last value should be present
+ assertEquals(true, bf.testLong(randVal));
+ // most likely this value should not exist
+ assertEquals(false, bf.testLong((byte) -120));
+
+ assertEquals(7800, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testBloomFilterInt() {
+ BloomFilter bf = new BloomFilter(10000);
+ int val = Integer.MIN_VALUE;
+ int val1 = 1;
+ int val2 = 2;
+ int val3 = Integer.MAX_VALUE;
+
+ assertEquals(false, bf.testLong(val));
+ assertEquals(false, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(false, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val1);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val2);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(true, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val3);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(true, bf.testLong(val2));
+ assertEquals(true, bf.testLong(val3));
+
+ int randVal = 0;
+ for (int i = 0; i < COUNT; i++) {
+ randVal = rand.nextInt();
+ bf.addLong(randVal);
+ }
+ // last value should be present
+ assertEquals(true, bf.testLong(randVal));
+ // most likely this value should not exist
+ assertEquals(false, bf.testLong(-120));
+
+ assertEquals(7800, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testBloomFilterLong() {
+ BloomFilter bf = new BloomFilter(10000);
+ long val = Long.MIN_VALUE;
+ long val1 = 1;
+ long val2 = 2;
+ long val3 = Long.MAX_VALUE;
+
+ assertEquals(false, bf.testLong(val));
+ assertEquals(false, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(false, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val1);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val2);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(true, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val3);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(true, bf.testLong(val2));
+ assertEquals(true, bf.testLong(val3));
+
+ long randVal = 0;
+ for (int i = 0; i < COUNT; i++) {
+ randVal = rand.nextLong();
+ bf.addLong(randVal);
+ }
+ // last value should be present
+ assertEquals(true, bf.testLong(randVal));
+ // most likely this value should not exist
+ assertEquals(false, bf.testLong(-120));
+
+ assertEquals(7800, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testBloomFilterFloat() {
+ BloomFilter bf = new BloomFilter(10000);
+ float val = Float.MIN_VALUE;
+ float val1 = 1.1f;
+ float val2 = 2.2f;
+ float val3 = Float.MAX_VALUE;
+
+ assertEquals(false, bf.testDouble(val));
+ assertEquals(false, bf.testDouble(val1));
+ assertEquals(false, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(false, bf.testDouble(val1));
+ assertEquals(false, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val1);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(true, bf.testDouble(val1));
+ assertEquals(false, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val2);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(true, bf.testDouble(val1));
+ assertEquals(true, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val3);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(true, bf.testDouble(val1));
+ assertEquals(true, bf.testDouble(val2));
+ assertEquals(true, bf.testDouble(val3));
+
+ float randVal = 0;
+ for (int i = 0; i < COUNT; i++) {
+ randVal = rand.nextFloat();
+ bf.addDouble(randVal);
+ }
+ // last value should be present
+ assertEquals(true, bf.testDouble(randVal));
+ // most likely this value should not exist
+ assertEquals(false, bf.testDouble(-120.2f));
+
+ assertEquals(7800, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testBloomFilterDouble() {
+ BloomFilter bf = new BloomFilter(10000);
+ double val = Double.MIN_VALUE;
+ double val1 = 1.1d;
+ double val2 = 2.2d;
+ double val3 = Double.MAX_VALUE;
+
+ assertEquals(false, bf.testDouble(val));
+ assertEquals(false, bf.testDouble(val1));
+ assertEquals(false, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(false, bf.testDouble(val1));
+ assertEquals(false, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val1);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(true, bf.testDouble(val1));
+ assertEquals(false, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val2);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(true, bf.testDouble(val1));
+ assertEquals(true, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val3);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(true, bf.testDouble(val1));
+ assertEquals(true, bf.testDouble(val2));
+ assertEquals(true, bf.testDouble(val3));
+
+ double randVal = 0;
+ for (int i = 0; i < COUNT; i++) {
+ randVal = rand.nextDouble();
+ bf.addDouble(randVal);
+ }
+ // last value should be present
+ assertEquals(true, bf.testDouble(randVal));
+ // most likely this value should not exist
+ assertEquals(false, bf.testDouble(-120.2d));
+
+ assertEquals(7800, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testBloomFilterString() {
+ BloomFilter bf = new BloomFilter(100000);
+ String val = "bloo";
+ String val1 = "bloom fil";
+ String val2 = "bloom filter";
+ String val3 = "cuckoo filter";
+
+ assertEquals(false, bf.testString(val));
+ assertEquals(false, bf.testString(val1));
+ assertEquals(false, bf.testString(val2));
+ assertEquals(false, bf.testString(val3));
+ bf.addString(val);
+ assertEquals(true, bf.testString(val));
+ assertEquals(false, bf.testString(val1));
+ assertEquals(false, bf.testString(val2));
+ assertEquals(false, bf.testString(val3));
+ bf.addString(val1);
+ assertEquals(true, bf.testString(val));
+ assertEquals(true, bf.testString(val1));
+ assertEquals(false, bf.testString(val2));
+ assertEquals(false, bf.testString(val3));
+ bf.addString(val2);
+ assertEquals(true, bf.testString(val));
+ assertEquals(true, bf.testString(val1));
+ assertEquals(true, bf.testString(val2));
+ assertEquals(false, bf.testString(val3));
+ bf.addString(val3);
+ assertEquals(true, bf.testString(val));
+ assertEquals(true, bf.testString(val1));
+ assertEquals(true, bf.testString(val2));
+ assertEquals(true, bf.testString(val3));
+
+ long randVal = 0;
+ for (int i = 0; i < COUNT; i++) {
+ randVal = rand.nextLong();
+ bf.addString(Long.toString(randVal));
+ }
+ // last value should be present
+ assertEquals(true, bf.testString(Long.toString(randVal)));
+ // most likely this value should not exist
+ assertEquals(false, bf.testString(Long.toString(-120)));
+
+ assertEquals(77944, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testMerge() {
+ BloomFilter bf = new BloomFilter(10000);
+ String val = "bloo";
+ String val1 = "bloom fil";
+ String val2 = "bloom filter";
+ String val3 = "cuckoo filter";
+ bf.addString(val);
+ bf.addString(val1);
+ bf.addString(val2);
+ bf.addString(val3);
+
+ BloomFilter bf2 = new BloomFilter(10000);
+ String v = "2_bloo";
+ String v1 = "2_bloom fil";
+ String v2 = "2_bloom filter";
+ String v3 = "2_cuckoo filter";
+ bf2.addString(v);
+ bf2.addString(v1);
+ bf2.addString(v2);
+ bf2.addString(v3);
+
+ assertEquals(true, bf.testString(val));
+ assertEquals(true, bf.testString(val1));
+ assertEquals(true, bf.testString(val2));
+ assertEquals(true, bf.testString(val3));
+ assertEquals(false, bf.testString(v));
+ assertEquals(false, bf.testString(v1));
+ assertEquals(false, bf.testString(v2));
+ assertEquals(false, bf.testString(v3));
+
+ bf.merge(bf2);
+
+ assertEquals(true, bf.testString(val));
+ assertEquals(true, bf.testString(val1));
+ assertEquals(true, bf.testString(val2));
+ assertEquals(true, bf.testString(val3));
+ assertEquals(true, bf.testString(v));
+ assertEquals(true, bf.testString(v1));
+ assertEquals(true, bf.testString(v2));
+ assertEquals(true, bf.testString(v3));
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestMurmur3.java ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestMurmur3.java
new file mode 100644
index 0000000..d92a3ce
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestMurmur3.java
@@ -0,0 +1,189 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.filters;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Random;
+
+/**
+ * Tests for Murmur3 variants.
+ */
+public class TestMurmur3 {
+
+ @Test
+ public void testHashCodesM3_32_string() {
+ String key = "test";
+ int seed = 123;
+ HashFunction hf = Hashing.murmur3_32(seed);
+ int hc1 = hf.hashBytes(key.getBytes()).asInt();
+ int hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed);
+ assertEquals(hc1, hc2);
+
+ key = "testkey";
+ hc1 = hf.hashBytes(key.getBytes()).asInt();
+ hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed);
+ assertEquals(hc1, hc2);
+ }
+
+ @Test
+ public void testHashCodesM3_32_ints() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_32(seed);
+ for (int i = 0; i < 1000; i++) {
+ int val = rand.nextInt();
+ byte[] data = ByteBuffer.allocate(4).putInt(val).array();
+ int hc1 = hf.hashBytes(data).asInt();
+ int hc2 = Murmur3.hash32(data, data.length, seed);
+ assertEquals(hc1, hc2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_32_longs() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_32(seed);
+ for (int i = 0; i < 1000; i++) {
+ long val = rand.nextLong();
+ byte[] data = ByteBuffer.allocate(8).putLong(val).array();
+ int hc1 = hf.hashBytes(data).asInt();
+ int hc2 = Murmur3.hash32(data, data.length, seed);
+ assertEquals(hc1, hc2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_32_double() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_32(seed);
+ for (int i = 0; i < 1000; i++) {
+ double val = rand.nextDouble();
+ byte[] data = ByteBuffer.allocate(8).putDouble(val).array();
+ int hc1 = hf.hashBytes(data).asInt();
+ int hc2 = Murmur3.hash32(data, data.length, seed);
+ assertEquals(hc1, hc2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_128_string() {
+ String key = "test";
+ int seed = 123;
+ HashFunction hf = Hashing.murmur3_128(seed);
+ // guava stores the hashcodes in little endian order
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(key.getBytes()).asBytes());
+ buf.flip();
+ long gl1 = buf.getLong();
+ long gl2 = buf.getLong(8);
+ long[] hc = Murmur3.hash128(key.getBytes(), key.getBytes().length, seed);
+ long m1 = hc[0];
+ long m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+
+ key = "testkey128_testkey128";
+ buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(key.getBytes()).asBytes());
+ buf.flip();
+ gl1 = buf.getLong();
+ gl2 = buf.getLong(8);
+ hc = Murmur3.hash128(key.getBytes(), key.getBytes().length, seed);
+ m1 = hc[0];
+ m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+ }
+
+ @Test
+ public void testHashCodesM3_128_ints() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_128(seed);
+ for (int i = 0; i < 1000; i++) {
+ int val = rand.nextInt();
+ byte[] data = ByteBuffer.allocate(4).putInt(val).array();
+ // guava stores the hashcodes in little endian order
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(data).asBytes());
+ buf.flip();
+ long gl1 = buf.getLong();
+ long gl2 = buf.getLong(8);
+ long[] hc = Murmur3.hash128(data, data.length, seed);
+ long m1 = hc[0];
+ long m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_128_longs() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_128(seed);
+ for (int i = 0; i < 1000; i++) {
+ long val = rand.nextLong();
+ byte[] data = ByteBuffer.allocate(8).putLong(val).array();
+ // guava stores the hashcodes in little endian order
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(data).asBytes());
+ buf.flip();
+ long gl1 = buf.getLong();
+ long gl2 = buf.getLong(8);
+ long[] hc = Murmur3.hash128(data, data.length, seed);
+ long m1 = hc[0];
+ long m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_128_double() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_128(seed);
+ for (int i = 0; i < 1000; i++) {
+ double val = rand.nextDouble();
+ byte[] data = ByteBuffer.allocate(8).putDouble(val).array();
+ // guava stores the hashcodes in little endian order
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(data).asBytes());
+ buf.flip();
+ long gl1 = buf.getLong();
+ long gl2 = buf.getLong(8);
+ long[] hc = Murmur3.hash128(data, data.length, seed);
+ long m1 = hc[0];
+ long m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+ }
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
index e451143..00afdac 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
@@ -21,19 +21,6 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hive.common.util.HiveTestUtils;
-import org.junit.Before;
-import org.junit.Test;
-
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
@@ -49,6 +36,19 @@
import java.util.Map;
import java.util.Random;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hive.common.util.HiveTestUtils;
+import org.junit.Before;
+import org.junit.Test;
+
public class TestFileDump {
Path workDir = new Path(System.getProperty("test.tmp.dir"));
@@ -303,4 +303,101 @@ public void testDictionaryThreshold() throws Exception {
checkOutput(outputFilename, workDir + File.separator + outputFilename);
}
+
+ @Test
+ public void testBloomFilter() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
+ OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+ .fileSystem(fs)
+ .inspector(inspector)
+ .stripeSize(100000)
+ .compress(CompressionKind.ZLIB)
+ .bufferSize(10000)
+ .rowIndexStride(1000)
+ .bloomFilterColumns("s");
+ Writer writer = OrcFile.createWriter(testFilePath, options);
+ Random r1 = new Random(1);
+ String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+ "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+ "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+ "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+ "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+ "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+ "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+ "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+ "before", "us,", "we", "were", "all", "going", "direct", "to",
+ "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+ "way"};
+ for(int i=0; i < 21000; ++i) {
+ writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(),
+ words[r1.nextInt(words.length)]));
+ }
+ writer.close();
+ PrintStream origOut = System.out;
+ String outputFilename = "orc-file-dump-bloomfilter.out";
+ FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString(), "--rowindex=3"});
+ System.out.flush();
+ System.setOut(origOut);
+
+
+ checkOutput(outputFilename, workDir + File.separator + outputFilename);
+ }
+
+ @Test
+ public void testBloomFilter2() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
+ OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+ .fileSystem(fs)
+ .inspector(inspector)
+ .stripeSize(100000)
+ .compress(CompressionKind.ZLIB)
+ .bufferSize(10000)
+ .rowIndexStride(1000)
+ .bloomFilterColumns("l")
+ .bloomFilterFpp(0.01);
+ Writer writer = OrcFile.createWriter(testFilePath, options);
+ Random r1 = new Random(1);
+ String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+ "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+ "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+ "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+ "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+ "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+ "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+ "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+ "before", "us,", "we", "were", "all", "going", "direct", "to",
+ "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+ "way"};
+ for(int i=0; i < 21000; ++i) {
+ writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(),
+ words[r1.nextInt(words.length)]));
+ }
+ writer.close();
+ PrintStream origOut = System.out;
+ String outputFilename = "orc-file-dump-bloomfilter2.out";
+ FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"});
+ System.out.flush();
+ System.setOut(origOut);
+
+
+ checkOutput(outputFilename, workDir + File.separator + outputFilename);
+ }
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 5c48d0b..cd1d645 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -515,6 +515,10 @@ public void testTimestamp() throws Exception {
Object row = rows.next(null);
assertEquals(tslist.get(idx++).getNanos(), ((TimestampWritable) row).getNanos());
}
+ assertEquals(1, OrcUtils.getFlattenedColumnsCount(inspector));
+ boolean[] expected = new boolean[] {false};
+ boolean[] included = OrcUtils.includeColumns("", "ts", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
}
@Test
@@ -538,6 +542,19 @@ public void testStringAndBinaryStatistics() throws Exception {
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
+ assertEquals(3, OrcUtils.getFlattenedColumnsCount(inspector));
+ boolean[] expected = new boolean[] {false, false, true};
+ boolean[] included = OrcUtils.includeColumns("string1", "bytes1,string1", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, false, false};
+ included = OrcUtils.includeColumns("", "bytes1,string1", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, false, false};
+ included = OrcUtils.includeColumns(null, "bytes1,string1", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(4, stats[0].getNumberOfValues());
@@ -634,6 +651,12 @@ public void testStripeLevelStats() throws Exception {
writer.close();
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
+
+ assertEquals(3, OrcUtils.getFlattenedColumnsCount(inspector));
+ boolean[] expected = new boolean[] {false, true, false};
+ boolean[] included = OrcUtils.includeColumns("int1", "int1,string1", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
Metadata metadata = reader.getMetadata();
int numStripes = metadata.getStripeStatistics().size();
assertEquals(3, numStripes);
@@ -672,7 +695,7 @@ public void testStripeLevelStats() throws Exception {
assertEquals(5000, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getSum());
RecordReaderImpl recordReader = (RecordReaderImpl) reader.rows();
- OrcProto.RowIndex[] index = recordReader.readRowIndex(0);
+ OrcProto.RowIndex[] index = recordReader.readRowIndex(0, null).getRowGroupIndex();
assertEquals(3, index.length);
List items = index[1].getEntryList();
assertEquals(1, items.size());
@@ -682,7 +705,7 @@ public void testStripeLevelStats() throws Exception {
assertEquals(0, items.get(0).getPositions(2));
assertEquals(1,
items.get(0).getStatistics().getIntStatistics().getMinimum());
- index = recordReader.readRowIndex(1);
+ index = recordReader.readRowIndex(1, null).getRowGroupIndex();
assertEquals(3, index.length);
items = index[1].getEntryList();
assertEquals(2,
@@ -715,6 +738,44 @@ public void test1() throws Exception {
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
+ assertEquals(24, OrcUtils.getFlattenedColumnsCount(inspector));
+ boolean[] expected = new boolean[] {false, false, false, false, false,
+ false, false, false, false, false,
+ false, false, false, false, false,
+ false, false, false, false, false,
+ false, false, false, false};
+ boolean[] included = OrcUtils.includeColumns("",
+ "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, true, false, false, false,
+ false, false, false, false, true,
+ true, true, true, true, true,
+ false, false, false, false, true,
+ true, true, true, true};
+ included = OrcUtils.includeColumns("boolean1,string1,middle,map",
+ "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, true, false, false, false,
+ false, false, false, false, true,
+ true, true, true, true, true,
+ false, false, false, false, true,
+ true, true, true, true};
+ included = OrcUtils.includeColumns("boolean1,string1,middle,map",
+ "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, true, true, true, true,
+ true, true, true, true, true,
+ true, true, true, true, true,
+ true, true, true, true, true,
+ true, true, true, true};
+ included = OrcUtils.includeColumns(
+ "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map",
+ "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
Metadata metadata = reader.getMetadata();
// check the stats
@@ -1183,6 +1244,20 @@ public void testUnionAndTimestamp() throws Exception {
writer.close();
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
+
+ assertEquals(6, OrcUtils.getFlattenedColumnsCount(inspector));
+ boolean[] expected = new boolean[] {false, false, false, false, false, false};
+ boolean[] included = OrcUtils.includeColumns("", "time,union,decimal", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, true, false, false, false, true};
+ included = OrcUtils.includeColumns("time,decimal", "time,union,decimal", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, false, true, true, true, false};
+ included = OrcUtils.includeColumns("union", "time,union,decimal", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
assertEquals(5309, reader.getNumberOfRows());
DecimalColumnStatistics stats =
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
index a86d19f..326dde4 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
@@ -23,6 +23,12 @@
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
@@ -31,6 +37,7 @@
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.io.filters.BloomFilter;
import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.Location;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -41,11 +48,6 @@
import org.mockito.MockSettings;
import org.mockito.Mockito;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.List;
-
public class TestRecordReaderImpl {
// can add .verboseLogging() to cause Mockito to log invocations
@@ -287,6 +289,13 @@ public void testGetMin() throws Exception {
return OrcProto.ColumnStatistics.newBuilder().setDateStatistics(dateStats.build()).build();
}
+ private static OrcProto.ColumnStatistics createTimestampStats(int min, int max) {
+ OrcProto.TimestampStatistics.Builder tsStats = OrcProto.TimestampStatistics.newBuilder();
+ tsStats.setMinimum(min);
+ tsStats.setMaximum(max);
+ return OrcProto.ColumnStatistics.newBuilder().setTimestampStatistics(tsStats.build()).build();
+ }
+
private static OrcProto.ColumnStatistics createDecimalStats(String min, String max) {
OrcProto.DecimalStatistics.Builder decStats = OrcProto.DecimalStatistics.newBuilder();
decStats.setMinimum(min);
@@ -294,6 +303,15 @@ public void testGetMin() throws Exception {
return OrcProto.ColumnStatistics.newBuilder().setDecimalStatistics(decStats.build()).build();
}
+ private static OrcProto.ColumnStatistics createDecimalStats(String min, String max,
+ boolean hasNull) {
+ OrcProto.DecimalStatistics.Builder decStats = OrcProto.DecimalStatistics.newBuilder();
+ decStats.setMinimum(min);
+ decStats.setMaximum(max);
+ return OrcProto.ColumnStatistics.newBuilder().setDecimalStatistics(decStats.build())
+ .setHasNull(hasNull).build();
+ }
+
@Test
public void testGetMax() throws Exception {
assertEquals(100L, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L))));
@@ -320,27 +338,27 @@ public void testPredEvalWithIntStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
}
@Test
@@ -348,27 +366,27 @@ public void testPredEvalWithDoubleStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
}
@Test
@@ -376,27 +394,27 @@ public void testPredEvalWithStringStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 100, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 100.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "100", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(100), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(100), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
}
@Test
@@ -404,57 +422,57 @@ public void testPredEvalWithDateStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "1970-01-11", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15.1", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "__a15__1", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "2000-01-16", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "1970-01-16", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(150), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
}
@@ -463,27 +481,27 @@ public void testPredEvalWithDecimalStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred));
+ RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
}
@@ -493,17 +511,17 @@ public void testEquals() throws Exception {
(PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.INTEGER,
"x", 15L, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null));
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null));
}
@Test
@@ -512,17 +530,17 @@ public void testNullSafeEquals() throws Exception {
(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER,
"x", 15L, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null));
}
@Test
@@ -531,15 +549,15 @@ public void testLessThan() throws Exception {
(PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.INTEGER,
"x", 15L, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), lessThan));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), lessThan, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), lessThan));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), lessThan, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), lessThan));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), lessThan, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), lessThan));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), lessThan, null));
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), lessThan));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), lessThan, null));
}
@Test
@@ -548,15 +566,15 @@ public void testLessThanEquals() throws Exception {
(PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.INTEGER,
"x", 15L, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null));
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null));
}
@Test
@@ -568,13 +586,13 @@ public void testIn() throws Exception {
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.INTEGER,
"x", null, args);
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(20L, 20L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 20L), pred, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(30L, 30L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null));
}
@Test
@@ -586,19 +604,19 @@ public void testBetween() throws Exception {
(PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.INTEGER,
"x", null, args);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(0L, 5L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 5L), pred, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(30L, 40L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 40L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(5L, 15L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 15L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 25L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 25L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(5L, 25L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 25L), pred, null));
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 20L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 20L), pred, null));
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null));
}
@Test
@@ -607,7 +625,7 @@ public void testIsNull() throws Exception {
(PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.INTEGER,
"x", null, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
}
@@ -617,17 +635,17 @@ public void testEqualsWithNullInStats() throws Exception {
(PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
}
@Test
@@ -636,17 +654,17 @@ public void testNullSafeEqualsWithNullInStats() throws Exception {
(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
}
@Test
@@ -655,17 +673,17 @@ public void testLessThanWithNullInStats() throws Exception {
(PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
assertEquals(TruthValue.NO_NULL, // min, same stats
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null));
}
@Test
@@ -674,17 +692,17 @@ public void testLessThanEqualsWithNullInStats() throws Exception {
(PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
}
@Test
@@ -696,17 +714,17 @@ public void testInWithNullInStats() throws Exception {
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING,
"x", null, args);
assertEquals(TruthValue.NO_NULL, // before & after
- RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred)); // max
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
}
@Test
@@ -718,31 +736,31 @@ public void testBetweenWithNullInStats() throws Exception {
(PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.STRING,
"x", null, args);
assertEquals(TruthValue.YES_NULL, // before & after
- RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null));
assertEquals(TruthValue.YES_NULL, // before & max
- RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null));
assertEquals(TruthValue.NO_NULL, // before & before
- RecordReaderImpl.evaluatePredicate(createStringStats("h", "g", true), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("h", "g", true), pred, null));
assertEquals(TruthValue.YES_NO_NULL, // before & min
- RecordReaderImpl.evaluatePredicate(createStringStats("f", "g", true), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("f", "g", true), pred, null));
assertEquals(TruthValue.YES_NO_NULL, // before & middle
- RecordReaderImpl.evaluatePredicate(createStringStats("e", "g", true), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "g", true), pred, null));
assertEquals(TruthValue.YES_NULL, // min & after
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "e", true), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "e", true), pred, null));
assertEquals(TruthValue.YES_NULL, // min & max
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "f", true), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "f", true), pred, null));
assertEquals(TruthValue.YES_NO_NULL, // min & middle
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "g", true), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "g", true), pred, null));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "c", true), pred)); // max
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "c", true), pred, null)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
assertEquals(TruthValue.YES_NULL, // min & after, same stats
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null));
}
@Test
@@ -751,9 +769,9 @@ public void testIsNullWithNullInStats() throws Exception {
(PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING,
"x", null, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", false), pred));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", false), pred, null));
}
@Test
@@ -1131,4 +1149,350 @@ public void testPartialPlanString() throws Exception {
41000, 51000 + RecordReaderImpl.WORST_UNCOMPRESSED_SLOP,
51000, 95000, 95000, 97000, 97000, 100000)));
}
+
+ @Test
+ public void testIntNullSafeEqualsBloomFilter() throws Exception {
+ PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
+ PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
+ BloomFilter bf = new BloomFilter(10000);
+ for (int i = 20; i < 1000; i++) {
+ bf.addLong(i);
+ }
+ ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createIntStats(10, 100));
+ assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+ bf.addLong(15);
+ assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+ }
+
+ @Test
+ public void testIntEqualsBloomFilter() throws Exception {
+ PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
+ PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
+ BloomFilter bf = new BloomFilter(10000);
+ for (int i = 20; i < 1000; i++) {
+ bf.addLong(i);
+ }
+ ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createIntStats(10, 100));
+ assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+ bf.addLong(15);
+ assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+ }
+
+ @Test
+ public void testIntInBloomFilter() throws Exception {
+ List