stripes = reader.getStripes();
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
index b46937c..25bb15a 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
@@ -18,16 +18,23 @@
package org.apache.hadoop.hive.ql.io.orc;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT;
+
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.io.filters.BloomFilter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.*;
-
/**
* Contains factory methods to read or write ORC files.
*/
@@ -148,7 +155,9 @@ private WriterVersion(int id) {
ROW_INDEX_STRIDE("orc.row.index.stride"),
ENABLE_INDEXES("orc.create.index"),
BLOCK_PADDING("orc.block.padding"),
- ENCODING_STRATEGY("orc.encoding.strategy");
+ ENCODING_STRATEGY("orc.encoding.strategy"),
+ BLOOM_FILTER_COLUMNS("orc.bloom.filter.columns"),
+ BLOOM_FILTER_FPP("orc.bloom.filter.fpp");
private final String propName;
@@ -256,6 +265,8 @@ public static Reader createReader(Path path,
private EncodingStrategy encodingStrategy;
private CompressionStrategy compressionStrategy;
private float paddingTolerance;
+ private String bloomFilterColumns;
+ private double bloomFilterFpp;
WriterOptions(Configuration conf) {
configuration = conf;
@@ -288,9 +299,9 @@ public static Reader createReader(Path path,
compressionStrategy = CompressionStrategy.valueOf(compString);
}
- paddingTolerance =
- conf.getFloat(HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.varname,
- HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.defaultFloatVal);
+ paddingTolerance = conf.getFloat(HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.varname,
+ HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.defaultFloatVal);
+ bloomFilterFpp = BloomFilter.DEFAULT_FPP;
}
/**
@@ -367,6 +378,24 @@ public WriterOptions paddingTolerance(float value) {
}
/**
+ * Comma separated values of column names for which bloom filter is to be created.
+ */
+ public WriterOptions bloomFilterColumns(String columns) {
+ bloomFilterColumns = columns;
+ return this;
+ }
+
+ /**
+ * Specify the false positive probability for bloom filter.
+ * @param fpp - false positive probability
+ * @return
+ */
+ public WriterOptions bloomFilterFpp(double fpp) {
+ bloomFilterFpp = fpp;
+ return this;
+ }
+
+ /**
* Sets the generic compression that is used to compress the data.
*/
public WriterOptions compress(CompressionKind value) {
@@ -438,8 +467,8 @@ public static Writer createWriter(Path path,
opts.memoryManagerValue, opts.blockPaddingValue,
opts.versionValue, opts.callback,
opts.encodingStrategy, opts.compressionStrategy,
- opts.paddingTolerance,
- opts.blockSizeValue);
+ opts.paddingTolerance, opts.blockSizeValue,
+ opts.bloomFilterColumns, opts.bloomFilterFpp);
}
/**
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 8c44e3e..498ee14 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -18,9 +18,18 @@
package org.apache.hadoop.hive.ql.io.orc;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -65,18 +74,9 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.TreeMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
* A MapReduce/Hive input format for ORC files.
*
@@ -920,7 +920,7 @@ private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics,
// column statistics at index 0 contains only the number of rows
ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
- truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred));
+ truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
} else {
// parition column case.
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
index 5bd3f0c..3a6faac 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
@@ -17,12 +17,17 @@
*/
package org.apache.hadoop.hive.ql.io.orc;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Properties;
+
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter;
import org.apache.hadoop.hive.ql.io.RecordUpdater;
+import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde.OrcSerdeRow;
import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -41,11 +46,6 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Progressable;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Properties;
-
/**
* A Hive OutputFormat for ORC files.
*/
@@ -170,6 +170,16 @@ private String getSettingFromPropsFallingBackToConf(String key, Properties props
options.encodingStrategy(EncodingStrategy.valueOf(propVal));
}
+ if ((propVal = props.getProperty(
+ OrcFile.OrcTableProperties.BLOOM_FILTER_COLUMNS.getPropName())) != null) {
+ options.bloomFilterColumns(propVal);
+ }
+
+ if ((propVal = props.getProperty(
+ OrcFile.OrcTableProperties.BLOOM_FILTER_FPP.getPropName())) != null) {
+ options.bloomFilterFpp(Double.parseDouble(propVal));
+ }
+
return options;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
new file mode 100644
index 0000000..ba59b35
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
@@ -0,0 +1,203 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
+
+import com.google.common.collect.Lists;
+
+public class OrcUtils {
+ private static final Log LOG = LogFactory.getLog(OrcUtils.class);
+
+ /**
+ * Returns selected columns as a boolean array with true value set for specified column names.
+ * The result will contain number of elements equal to flattened number of columns.
+ * For example:
+ * selectedColumns - a,b,c
+ * allColumns - a,b,c,d
+ * If column c is a complex type, say list and other types are primitives then result will
+ * be [false, true, true, true, true, true, false]
+ * Index 0 is the root element of the struct which is set to false by default, index 1,2
+ * corresponds to columns a and b. Index 3,4 correspond to column c which is list and
+ * index 5 correspond to column d. After flattening list gets 2 columns.
+ *
+ * @param selectedColumns - comma separated list of selected column names
+ * @param allColumns - comma separated list of all column names
+ * @param inspector - object inspector
+ * @return - boolean array with true value set for the specified column names
+ */
+ public static boolean[] includeColumns(String selectedColumns, String allColumns,
+ ObjectInspector inspector) {
+ int numFlattenedCols = getFlattenedColumnsCount(inspector);
+ boolean[] results = new boolean[numFlattenedCols];
+ if (selectedColumns != null && !selectedColumns.isEmpty()) {
+ includeColumnsImpl(results, selectedColumns, allColumns, inspector);
+ }
+ return results;
+ }
+
+ private static void includeColumnsImpl(boolean[] includeColumns, String selectedColumns,
+ String allColumns,
+ ObjectInspector inspector) {
+ Map> columnSpanMap = getColumnSpan(allColumns, inspector);
+ LOG.info("columnSpanMap: " + columnSpanMap);
+
+ String[] selCols = selectedColumns.split(",");
+ for (String sc : selCols) {
+ if (columnSpanMap.containsKey(sc)) {
+ List colSpan = columnSpanMap.get(sc);
+ int start = colSpan.get(0);
+ int end = colSpan.get(1);
+ for (int i = start; i <= end; i++) {
+ includeColumns[i] = true;
+ }
+ }
+ }
+
+ LOG.info("includeColumns: " + Arrays.toString(includeColumns));
+ }
+
+ private static Map> getColumnSpan(String allColumns,
+ ObjectInspector inspector) {
+ // map that contains the column span for each column. Column span is the number of columns
+ // required after flattening. For a given object inspector this map contains the start column
+ // id and end column id (both inclusive) after flattening.
+ // EXAMPLE:
+ // schema: struct>
+ // column span map for the above struct will be
+ // a => [1,1], b => [2,2], c => [3,5]
+ Map> columnSpanMap = new HashMap>();
+ if (allColumns != null) {
+ String[] columns = allColumns.split(",");
+ int startIdx = 0;
+ int endIdx = 0;
+ if (inspector instanceof StructObjectInspector) {
+ StructObjectInspector soi = (StructObjectInspector) inspector;
+ List extends StructField> fields = soi.getAllStructFieldRefs();
+ for (int i = 0; i < fields.size(); i++) {
+ StructField sf = fields.get(i);
+
+ // we get the type (category) from object inspector but column name from the argument.
+ // The reason for this is hive (FileSinkOperator) does not pass the actual column names,
+ // instead it passes the internal column names (_col1,_col2).
+ ObjectInspector sfOI = sf.getFieldObjectInspector();
+ String colName = columns[i];
+
+ startIdx = endIdx + 1;
+ switch (sfOI.getCategory()) {
+ case PRIMITIVE:
+ endIdx += 1;
+ break;
+ case STRUCT:
+ endIdx += 1;
+ StructObjectInspector structInsp = (StructObjectInspector) sfOI;
+ List extends StructField> structFields = structInsp.getAllStructFieldRefs();
+ for (int j = 0; j < structFields.size(); ++j) {
+ endIdx += getFlattenedColumnsCount(structFields.get(j).getFieldObjectInspector());
+ }
+ break;
+ case MAP:
+ endIdx += 1;
+ MapObjectInspector mapInsp = (MapObjectInspector) sfOI;
+ endIdx += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector());
+ endIdx += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector());
+ break;
+ case LIST:
+ endIdx += 1;
+ ListObjectInspector listInsp = (ListObjectInspector) sfOI;
+ endIdx += getFlattenedColumnsCount(listInsp.getListElementObjectInspector());
+ break;
+ case UNION:
+ endIdx += 1;
+ UnionObjectInspector unionInsp = (UnionObjectInspector) sfOI;
+ List choices = unionInsp.getObjectInspectors();
+ for (int j = 0; j < choices.size(); ++j) {
+ endIdx += getFlattenedColumnsCount(choices.get(j));
+ }
+ break;
+ default:
+ throw new IllegalArgumentException("Bad category: " +
+ inspector.getCategory());
+ }
+
+ columnSpanMap.put(colName, Lists.newArrayList(startIdx, endIdx));
+ }
+ }
+ }
+ return columnSpanMap;
+ }
+
+ /**
+ * Returns the number of columns after flatting complex types.
+ *
+ * @param inspector - object inspector
+ * @return
+ */
+ public static int getFlattenedColumnsCount(ObjectInspector inspector) {
+ int numWriters = 0;
+ switch (inspector.getCategory()) {
+ case PRIMITIVE:
+ numWriters += 1;
+ break;
+ case STRUCT:
+ numWriters += 1;
+ StructObjectInspector structInsp = (StructObjectInspector) inspector;
+ List extends StructField> fields = structInsp.getAllStructFieldRefs();
+ for (int i = 0; i < fields.size(); ++i) {
+ numWriters += getFlattenedColumnsCount(fields.get(i).getFieldObjectInspector());
+ }
+ break;
+ case MAP:
+ numWriters += 1;
+ MapObjectInspector mapInsp = (MapObjectInspector) inspector;
+ numWriters += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector());
+ numWriters += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector());
+ break;
+ case LIST:
+ numWriters += 1;
+ ListObjectInspector listInsp = (ListObjectInspector) inspector;
+ numWriters += getFlattenedColumnsCount(listInsp.getListElementObjectInspector());
+ break;
+ case UNION:
+ numWriters += 1;
+ UnionObjectInspector unionInsp = (UnionObjectInspector) inspector;
+ List choices = unionInsp.getObjectInspectors();
+ for (int i = 0; i < choices.size(); ++i) {
+ numWriters += getFlattenedColumnsCount(choices.get(i));
+ }
+ break;
+ default:
+ throw new IllegalArgumentException("Bad category: " +
+ inspector.getCategory());
+ }
+ return numWriters;
+ }
+
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index e3afa60..8592075 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -24,6 +24,7 @@
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
+import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
@@ -51,6 +52,7 @@
import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.io.filters.BloomFilter;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -101,11 +103,14 @@
List bufferChunks = new ArrayList(0);
private final TreeReader reader;
private final OrcProto.RowIndex[] indexes;
+ private final OrcProto.BloomFilterIndex[] bloomFilterIndices;
private final SearchArgument sarg;
// the leaf predicates for the sarg
private final List sargLeaves;
// an array the same length as the sargLeaves that map them to column ids
private final int[] filterColumns;
+ // same as the above array, but indices are set to true
+ private final boolean[] sargColumns;
// an array about which row groups aren't skipped
private boolean[] includedRowGroups = null;
private final Configuration conf;
@@ -113,6 +118,24 @@
private final ByteBufferAllocatorPool pool = new ByteBufferAllocatorPool();
private final ZeroCopyReaderShim zcr;
+ public final static class Index {
+ OrcProto.RowIndex[] rowGroupIndex;
+ OrcProto.BloomFilterIndex[] bloomFilterIndex;
+
+ Index(OrcProto.RowIndex[] rgIndex, OrcProto.BloomFilterIndex[] bfIndex) {
+ this.rowGroupIndex = rgIndex;
+ this.bloomFilterIndex = bfIndex;
+ }
+
+ public OrcProto.RowIndex[] getRowGroupIndex() {
+ return rowGroupIndex;
+ }
+
+ public OrcProto.BloomFilterIndex[] getBloomFilterIndex() {
+ return bloomFilterIndex;
+ }
+ }
+
// this is an implementation copied from ElasticByteBufferPool in hadoop-2,
// which lacks a clear()/clean() operation
public final static class ByteBufferAllocatorPool implements ByteBufferPoolShim {
@@ -251,9 +274,15 @@ static int findColumns(String[] columnNames,
if (sarg != null) {
sargLeaves = sarg.getLeaves();
filterColumns = mapSargColumns(sargLeaves, options.getColumnNames(), 0);
+ // included will not be null, row options will fill the array with trues if null
+ sargColumns = new boolean[included.length];
+ for (int i : filterColumns) {
+ sargColumns[i] = true;
+ }
} else {
sargLeaves = null;
filterColumns = null;
+ sargColumns = null;
}
long rows = 0;
long skippedRows = 0;
@@ -285,6 +314,7 @@ static int findColumns(String[] columnNames,
totalRowCount = rows;
reader = createTreeReader(path, 0, types, included, conf);
indexes = new OrcProto.RowIndex[types.size()];
+ bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()];
rowIndexStride = strideRate;
advanceToNextRow(0L);
}
@@ -2358,15 +2388,20 @@ static Object getMin(ColumnStatistics index) {
* that is referenced in the predicate.
* @param statsProto the statistics for the column mentioned in the predicate
* @param predicate the leaf predicate we need to evaluation
+ * @param bloomFilter
* @return the set of truth values that may be returned for the given
* predicate.
*/
static TruthValue evaluatePredicate(OrcProto.ColumnStatistics statsProto,
- PredicateLeaf predicate) {
+ PredicateLeaf predicate, OrcProto.BloomFilter bloomFilter) {
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(statsProto);
Object minValue = getMin(cs);
Object maxValue = getMax(cs);
- return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull());
+ BloomFilter bf = null;
+ if (bloomFilter != null) {
+ bf = new BloomFilter(bloomFilter);
+ }
+ return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull(), bf);
}
/**
@@ -2378,14 +2413,14 @@ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics statsProto,
* predicate.
*/
static TruthValue evaluatePredicate(ColumnStatistics stats,
- PredicateLeaf predicate) {
+ PredicateLeaf predicate, BloomFilter bloomFilter) {
Object minValue = getMin(stats);
Object maxValue = getMax(stats);
- return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull());
+ return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter);
}
static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
- Object max, boolean hasNull) {
+ Object max, boolean hasNull, BloomFilter bloomFilter) {
// if we didn't have any values, everything must have been null
if (min == null) {
if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
@@ -2411,20 +2446,30 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
switch (predicate.getOperator()) {
case NULL_SAFE_EQUALS:
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
- if (loc == Location.BEFORE || loc == Location.AFTER) {
- return TruthValue.NO;
+ // if bloom filter exists, check in bloom filter else min/max stats
+ if (bloomFilter == null) {
+ loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ if (loc == Location.BEFORE || loc == Location.AFTER) {
+ return TruthValue.NO;
+ } else {
+ return TruthValue.YES_NO;
+ }
} else {
- return TruthValue.YES_NO;
+ return checkInBloomFilter(bloomFilter, predObj);
}
case EQUALS:
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
- if (minValue.equals(maxValue) && loc == Location.MIN) {
- return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
- } else if (loc == Location.BEFORE || loc == Location.AFTER) {
- return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+ // if bloom filter exists, check in bloom filter else min/max stats
+ if (bloomFilter == null) {
+ loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ if (minValue.equals(maxValue) && loc == Location.MIN) {
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
+ } else if (loc == Location.BEFORE || loc == Location.AFTER) {
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+ } else {
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ }
} else {
- return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ return checkInBloomFilter(bloomFilter, predObj);
}
case LESS_THAN:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
@@ -2450,9 +2495,16 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
// set
for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
predObj = getBaseObjectForComparison(arg, minValue);
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
- if (loc == Location.MIN) {
- return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
+ if (bloomFilter == null) {
+ loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ if (loc == Location.MIN) {
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
+ }
+ } else {
+ // if atleast one value in IN list exist in bloom filter, qualify the row group/stripe
+ if (checkInBloomFilter(bloomFilter, predObj) != TruthValue.NO_NULL) {
+ return TruthValue.YES_NO_NULL;
+ }
}
}
return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
@@ -2460,10 +2512,17 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
// are all of the values outside of the range?
for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
predObj = getBaseObjectForComparison(arg, minValue);
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
- if (loc == Location.MIN || loc == Location.MIDDLE ||
- loc == Location.MAX) {
- return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ if (bloomFilter == null) {
+ loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ if (loc == Location.MIN || loc == Location.MIDDLE ||
+ loc == Location.MAX) {
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ }
+ } else {
+ // if atleast one value in IN list exist in bloom filter, qualify the row group/stripe
+ if (checkInBloomFilter(bloomFilter, predObj) != TruthValue.NO_NULL) {
+ return TruthValue.YES_NO_NULL;
+ }
}
}
return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
@@ -2502,6 +2561,49 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
}
}
+ private static TruthValue checkInBloomFilter(BloomFilter bf, Object predObj) {
+ TruthValue result = TruthValue.NO_NULL;
+
+ if (predObj instanceof Long) {
+ if (bf.testLong(((Long) predObj).longValue())) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else if (predObj instanceof Double) {
+ if (bf.testDouble(((Double) predObj).doubleValue())) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else if (predObj instanceof String || predObj instanceof Text ||
+ predObj instanceof HiveDecimal || predObj instanceof BigDecimal) {
+ if (bf.testString(predObj.toString())) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else if (predObj instanceof Date) {
+ if (bf.testLong(DateWritable.dateToDays((Date) predObj))) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else if (predObj instanceof DateWritable) {
+ if (bf.testLong(((DateWritable) predObj).getDays())) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else if (predObj instanceof Timestamp) {
+ if (bf.testLong(((Timestamp) predObj).getTime())) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else if (predObj instanceof TimestampWritable) {
+ if (bf.testLong(((TimestampWritable) predObj).getTimestamp().getTime())) {
+ result = TruthValue.YES_NO_NULL;
+ }
+ } else {
+ result = TruthValue.YES_NO_NULL;
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Bloom filter evaluation: " + result.toString());
+ }
+
+ return result;
+ }
+
private static Object getBaseObjectForComparison(Object predObj, Object statsObj) {
if (predObj != null) {
if (predObj instanceof ExprNodeConstantDesc) {
@@ -2567,7 +2669,7 @@ private static Object getConvertedStatsObj(Object statsObj, Object predObj) {
if (sarg == null || rowIndexStride == 0) {
return null;
}
- readRowIndex(currentStripe);
+ readRowIndex(currentStripe, sargColumns);
long rowsInStripe = stripes.get(currentStripe).getNumberOfRows();
int groupsInStripe = (int) ((rowsInStripe + rowIndexStride - 1) /
rowIndexStride);
@@ -2578,7 +2680,11 @@ private static Object getConvertedStatsObj(Object statsObj, Object predObj) {
if (filterColumns[pred] != -1) {
OrcProto.ColumnStatistics stats =
indexes[filterColumns[pred]].getEntry(rowGroup).getStatistics();
- leafValues[pred] = evaluatePredicate(stats, sargLeaves.get(pred));
+ OrcProto.BloomFilter bf = null;
+ if (bloomFilterIndices[filterColumns[pred]] != null) {
+ bf = bloomFilterIndices[filterColumns[pred]].getBloomFilter(rowGroup);
+ }
+ leafValues[pred] = evaluatePredicate(stats, sargLeaves.get(pred), bf);
if (LOG.isDebugEnabled()) {
LOG.debug("Stats = " + stats);
LOG.debug("Setting " + sargLeaves.get(pred) + " to " +
@@ -3221,7 +3327,7 @@ private int findStripe(long rowNumber) {
throw new IllegalArgumentException("Seek after the end of reader range");
}
- OrcProto.RowIndex[] readRowIndex(int stripeIndex) throws IOException {
+ Index readRowIndex(int stripeIndex, boolean[] sargColumns) throws IOException {
long offset = stripes.get(stripeIndex).getOffset();
OrcProto.StripeFooter stripeFooter;
OrcProto.RowIndex[] indexes;
@@ -3233,21 +3339,45 @@ private int findStripe(long rowNumber) {
stripeFooter = readStripeFooter(stripes.get(stripeIndex));
indexes = new OrcProto.RowIndex[this.indexes.length];
}
- for(OrcProto.Stream stream: stripeFooter.getStreamsList()) {
+ List streams = stripeFooter.getStreamsList();
+ for (int i = 0; i < streams.size(); i++) {
+ OrcProto.Stream stream = streams.get(i);
+ OrcProto.Stream nextStream = null;
+ if (i < streams.size() - 1) {
+ nextStream = streams.get(i+1);
+ }
+ int col = stream.getColumn();
+ int len = (int) stream.getLength();
+ // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
+ // filter and combine the io to read row index and bloom filters for that column together
if (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX) {
- int col = stream.getColumn();
+ boolean readBloomFilter = false;
+ if (sargColumns != null && sargColumns[col] &&
+ nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) {
+ len += nextStream.getLength();
+ i += 1;
+ readBloomFilter = true;
+ }
if ((included == null || included[col]) && indexes[col] == null) {
- byte[] buffer = new byte[(int) stream.getLength()];
+ byte[] buffer = new byte[len];
file.seek(offset);
file.readFully(buffer);
+ ByteBuffer[] bb = new ByteBuffer[] {ByteBuffer.wrap(buffer)};
indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create("index",
- new ByteBuffer[] {ByteBuffer.wrap(buffer)}, new long[]{0},
- stream.getLength(), codec, bufferSize));
+ bb, new long[]{0}, stream.getLength(), codec, bufferSize));
+ if (readBloomFilter) {
+ bb[0].position((int) stream.getLength());
+ bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom(
+ InStream.create("bloom_filter", bb, new long[]{0}, nextStream.getLength(),
+ codec, bufferSize));
+ }
}
}
- offset += stream.getLength();
+ offset += len;
}
- return indexes;
+
+ Index index = new Index(indexes, bloomFilterIndices);
+ return index;
}
private void seekToRowEntry(int rowEntry) throws IOException {
@@ -3279,7 +3409,7 @@ public void seekToRow(long rowNumber) throws IOException {
currentStripe = rightStripe;
readStripe();
}
- readRowIndex(currentStripe);
+ readRowIndex(currentStripe, sargColumns);
// if we aren't to the right row yet, advanance in the stripe.
advanceToNextRow(rowNumber);
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
index 3d44954..6215791 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
@@ -75,6 +75,7 @@ public static Area getArea(OrcProto.Stream.Kind kind) {
switch (kind) {
case ROW_INDEX:
case DICTIONARY_COUNT:
+ case BLOOM_FILTER:
return Area.INDEX;
default:
return Area.DATA;
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
index 159d3ab..25f6bac 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
@@ -20,10 +20,16 @@
import static com.google.common.base.Preconditions.checkArgument;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Lists;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.CodedOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.management.ManagementFactory;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -34,6 +40,7 @@
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.IOConstants;
+import org.apache.hadoop.hive.ql.io.filters.BloomFilter;
import org.apache.hadoop.hive.ql.io.orc.CompressionCodec.Modifier;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy;
@@ -70,16 +77,12 @@
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.lang.management.ManagementFactory;
-import java.nio.ByteBuffer;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.EnumSet;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
+import com.google.common.primitives.Longs;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.CodedOutputStream;
/**
* An ORC file writer. The file is divided into stripes, which is the natural
@@ -145,23 +148,27 @@
private final OrcFile.WriterContext callbackContext;
private final OrcFile.EncodingStrategy encodingStrategy;
private final OrcFile.CompressionStrategy compressionStrategy;
+ private final boolean[] bloomFilterColumns;
+ private final double bloomFilterFpp;
WriterImpl(FileSystem fs,
- Path path,
- Configuration conf,
- ObjectInspector inspector,
- long stripeSize,
- CompressionKind compress,
- int bufferSize,
- int rowIndexStride,
- MemoryManager memoryManager,
- boolean addBlockPadding,
- OrcFile.Version version,
- OrcFile.WriterCallback callback,
- OrcFile.EncodingStrategy encodingStrategy,
- CompressionStrategy compressionStrategy,
- float paddingTolerance,
- long blockSizeValue) throws IOException {
+ Path path,
+ Configuration conf,
+ ObjectInspector inspector,
+ long stripeSize,
+ CompressionKind compress,
+ int bufferSize,
+ int rowIndexStride,
+ MemoryManager memoryManager,
+ boolean addBlockPadding,
+ OrcFile.Version version,
+ OrcFile.WriterCallback callback,
+ EncodingStrategy encodingStrategy,
+ CompressionStrategy compressionStrategy,
+ float paddingTolerance,
+ long blockSizeValue,
+ String bloomFilterColumnNames,
+ double bloomFilterFpp) throws IOException {
this.fs = fs;
this.path = path;
this.conf = conf;
@@ -190,7 +197,13 @@ public Writer getWriter() {
this.memoryManager = memoryManager;
buildIndex = rowIndexStride > 0;
codec = createCodec(compress);
- this.bufferSize = getEstimatedBufferSize(bufferSize);
+ String allColumns = conf.get(IOConstants.COLUMNS);
+ if (allColumns == null) {
+ allColumns = getColumnNamesFromInspector(inspector);
+ }
+ this.bufferSize = getEstimatedBufferSize(allColumns, bufferSize);
+ this.bloomFilterColumns = OrcUtils.includeColumns(bloomFilterColumnNames, allColumns, inspector);
+ this.bloomFilterFpp = bloomFilterFpp;
treeWriter = createTreeWriter(inspector, streamFactory, false);
if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
throw new IllegalArgumentException("Row stride must be at least " +
@@ -201,8 +214,25 @@ public Writer getWriter() {
memoryManager.addWriter(path, stripeSize, this);
}
+ private String getColumnNamesFromInspector(ObjectInspector inspector) {
+ List fieldNames = Lists.newArrayList();
+ Joiner joiner = Joiner.on(",");
+ if (inspector instanceof StructObjectInspector) {
+ StructObjectInspector soi = (StructObjectInspector) inspector;
+ List extends StructField> fields = soi.getAllStructFieldRefs();
+ for(StructField sf : fields) {
+ fieldNames.add(sf.getFieldName());
+ }
+ }
+ return joiner.join(fieldNames);
+ }
+
+ @VisibleForTesting
int getEstimatedBufferSize(int bs) {
- String colNames = conf.get(IOConstants.COLUMNS);
+ return getEstimatedBufferSize(conf.get(IOConstants.COLUMNS), bs);
+ }
+
+ int getEstimatedBufferSize(String colNames, int bs) {
long availableMem = getMemoryAvailableForORC();
if (colNames != null) {
final int numCols = colNames.split(",").length;
@@ -459,26 +489,27 @@ public OutStream createStream(int column,
final EnumSet modifiers;
switch (kind) {
- case DATA:
- case DICTIONARY_DATA:
- if (getCompressionStrategy() == CompressionStrategy.SPEED) {
- modifiers = EnumSet.of(Modifier.FAST, Modifier.TEXT);
- } else {
- modifiers = EnumSet.of(Modifier.DEFAULT, Modifier.TEXT);
- }
- break;
- case LENGTH:
- case DICTIONARY_COUNT:
- case PRESENT:
- case ROW_INDEX:
- case SECONDARY:
- // easily compressed using the fastest modes
- modifiers = EnumSet.of(Modifier.FASTEST, Modifier.BINARY);
- break;
- default:
- LOG.warn("Missing ORC compression modifiers for " + kind);
- modifiers = null;
- break;
+ case BLOOM_FILTER:
+ case DATA:
+ case DICTIONARY_DATA:
+ if (getCompressionStrategy() == CompressionStrategy.SPEED) {
+ modifiers = EnumSet.of(Modifier.FAST, Modifier.TEXT);
+ } else {
+ modifiers = EnumSet.of(Modifier.DEFAULT, Modifier.TEXT);
+ }
+ break;
+ case LENGTH:
+ case DICTIONARY_COUNT:
+ case PRESENT:
+ case ROW_INDEX:
+ case SECONDARY:
+ // easily compressed using the fastest modes
+ modifiers = EnumSet.of(Modifier.FASTEST, Modifier.BINARY);
+ break;
+ default:
+ LOG.warn("Missing ORC compression modifiers for " + kind);
+ modifiers = null;
+ break;
}
BufferedStream result = streams.get(name);
@@ -499,6 +530,15 @@ public int getNextColumnId() {
}
/**
+ * Get the current column id. After creating all tree writers this count should tell how many
+ * columns (including columns within nested complex objects) are created in total.
+ * @return current column id
+ */
+ public int getCurrentColumnId() {
+ return columnCount;
+ }
+
+ /**
* Get the stride rate of the row index.
*/
public int getRowIndexStride() {
@@ -538,6 +578,22 @@ public CompressionStrategy getCompressionStrategy() {
}
/**
+ * Get the bloom filter columns
+ * @return bloom filter columns
+ */
+ public boolean[] getBloomFilterColumns() {
+ return bloomFilterColumns;
+ }
+
+ /**
+ * Get bloom filter false positive percentage.
+ * @return fpp
+ */
+ public double getBloomFilterFPP() {
+ return bloomFilterFpp;
+ }
+
+ /**
* Get the writer's configuration.
* @return configuration
*/
@@ -572,6 +628,11 @@ public Configuration getConfiguration() {
private final OrcProto.RowIndex.Builder rowIndex;
private final OrcProto.RowIndexEntry.Builder rowIndexEntry;
private final PositionedOutputStream rowIndexStream;
+ private final PositionedOutputStream bloomFilterStream;
+ protected final BloomFilter bloomFilter;
+ protected final boolean createBloomFilter;
+ private final OrcProto.BloomFilterIndex.Builder bloomFilterIndex;
+ private final OrcProto.BloomFilter.Builder bloomFilterEntry;
private boolean foundNulls;
private OutStream isPresentOutStream;
private final List stripeStatsBuilders;
@@ -598,6 +659,7 @@ public Configuration getConfiguration() {
isPresent = null;
}
this.foundNulls = false;
+ createBloomFilter = streamFactory.getBloomFilterColumns()[columnId];
indexStatistics = ColumnStatisticsImpl.create(inspector);
stripeColStatistics = ColumnStatisticsImpl.create(inspector);
fileStatistics = ColumnStatisticsImpl.create(inspector);
@@ -607,11 +669,22 @@ public Configuration getConfiguration() {
rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry);
stripeStatsBuilders = Lists.newArrayList();
if (streamFactory.buildIndex()) {
- rowIndexStream = streamFactory.createStream(id,
- OrcProto.Stream.Kind.ROW_INDEX);
+ rowIndexStream = streamFactory.createStream(id, OrcProto.Stream.Kind.ROW_INDEX);
} else {
rowIndexStream = null;
}
+ if (createBloomFilter) {
+ bloomFilterEntry = OrcProto.BloomFilter.newBuilder();
+ bloomFilterIndex = OrcProto.BloomFilterIndex.newBuilder();
+ bloomFilterStream = streamFactory.createStream(id, OrcProto.Stream.Kind.BLOOM_FILTER);
+ bloomFilter = new BloomFilter(streamFactory.getRowIndexStride(),
+ streamFactory.getBloomFilterFPP());
+ } else {
+ bloomFilterEntry = null;
+ bloomFilterIndex = null;
+ bloomFilterStream = null;
+ bloomFilter = null;
+ }
}
protected OrcProto.RowIndex.Builder getRowIndex() {
@@ -725,6 +798,14 @@ void writeStripe(OrcProto.StripeFooter.Builder builder,
}
rowIndex.clear();
rowIndexEntry.clear();
+
+ // write the bloom filter to out stream
+ if (bloomFilterStream != null) {
+ bloomFilterIndex.build().writeTo(bloomFilterStream);
+ bloomFilterStream.flush();
+ bloomFilterIndex.clear();
+ bloomFilterEntry.clear();
+ }
}
private void writeStripeStatistics(OrcProto.StripeStatistics.Builder builder,
@@ -763,12 +844,24 @@ void createRowIndexEntry() throws IOException {
indexStatistics.reset();
rowIndex.addEntry(rowIndexEntry);
rowIndexEntry.clear();
+ addBloomFilterEntry();
recordPosition(rowIndexPosition);
for(TreeWriter child: childrenWriters) {
child.createRowIndexEntry();
}
}
+ void addBloomFilterEntry() {
+ if (createBloomFilter) {
+ bloomFilterEntry.setFpp(bloomFilter.getFalsePositiveProbability());
+ bloomFilterEntry.setNumEntries(bloomFilter.getExpectedNumEntries());
+ bloomFilterEntry.addAllBitset(Longs.asList(bloomFilter.getBitSet()));
+ bloomFilterIndex.addBloomFilter(bloomFilterEntry.build());
+ bloomFilter.reset();
+ bloomFilterEntry.clear();
+ }
+ }
+
/**
* Record the current position in each of this column's streams.
* @param recorder where should the locations be recorded
@@ -851,6 +944,9 @@ void write(Object obj) throws IOException {
if (obj != null) {
byte val = ((ByteObjectInspector) inspector).get(obj);
indexStatistics.updateInteger(val);
+ if (createBloomFilter) {
+ bloomFilter.addLong(val);
+ }
writer.write(val);
}
}
@@ -926,6 +1022,10 @@ void write(Object obj) throws IOException {
val = shortInspector.get(obj);
}
indexStatistics.updateInteger(val);
+ if (createBloomFilter) {
+ // integers are converted to longs in column statistics and during SARG evaluation
+ bloomFilter.addLong(val);
+ }
writer.write(val);
}
}
@@ -966,6 +1066,10 @@ void write(Object obj) throws IOException {
if (obj != null) {
float val = ((FloatObjectInspector) inspector).get(obj);
indexStatistics.updateDouble(val);
+ if (createBloomFilter) {
+ // floats are converted to doubles in column statistics and during SARG evaluation
+ bloomFilter.addDouble(val);
+ }
utils.writeFloat(stream, val);
}
}
@@ -1006,6 +1110,9 @@ void write(Object obj) throws IOException {
if (obj != null) {
double val = ((DoubleObjectInspector) inspector).get(obj);
indexStatistics.updateDouble(val);
+ if (createBloomFilter) {
+ bloomFilter.addDouble(val);
+ }
utils.writeDouble(stream, val);
}
}
@@ -1099,6 +1206,9 @@ void write(Object obj) throws IOException {
directLengthOutput.write(val.getLength());
}
indexStatistics.updateString(val);
+ if (createBloomFilter) {
+ bloomFilter.addBytes(val.getBytes(), val.getLength());
+ }
}
}
@@ -1250,6 +1360,7 @@ void createRowIndexEntry() throws IOException {
OrcProto.RowIndexEntry base = rowIndexEntry.build();
savedRowIndex.add(base);
rowIndexEntry.clear();
+ addBloomFilterEntry();
recordPosition(rowIndexPosition);
rowIndexValueCount.add(Long.valueOf(rows.size()));
if (strideDictionaryCheck) {
@@ -1360,6 +1471,9 @@ void write(Object obj) throws IOException {
stream.write(val.getBytes(), 0, val.getLength());
length.write(val.getLength());
indexStatistics.updateBinary(val);
+ if (createBloomFilter) {
+ bloomFilter.addBytes(val.getBytes(), val.getLength());
+ }
}
}
@@ -1422,6 +1536,9 @@ void write(Object obj) throws IOException {
indexStatistics.updateTimestamp(val);
seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP);
nanos.write(formatNanos(val.getNanos()));
+ if (createBloomFilter) {
+ bloomFilter.addLong(val.getTime());
+ }
}
}
@@ -1482,6 +1599,9 @@ void write(Object obj) throws IOException {
DateWritable val = ((DateObjectInspector) inspector).getPrimitiveWritableObject(obj);
indexStatistics.updateDate(val);
writer.write(val.getDays());
+ if (createBloomFilter) {
+ bloomFilter.addLong(val.getDays());
+ }
}
}
@@ -1550,6 +1670,9 @@ void write(Object obj) throws IOException {
decimal.unscaledValue());
scaleStream.write(decimal.scale());
indexStatistics.updateDecimal(decimal);
+ if (createBloomFilter) {
+ bloomFilter.addString(decimal.toString());
+ }
}
}
@@ -1649,6 +1772,9 @@ void write(Object obj) throws IOException {
ListObjectInspector insp = (ListObjectInspector) inspector;
int len = insp.getListLength(obj);
lengths.write(len);
+ if (createBloomFilter) {
+ bloomFilter.addLong(len);
+ }
for(int i=0; i < len; ++i) {
childrenWriters[0].write(insp.getListElement(obj, i));
}
@@ -1713,6 +1839,9 @@ void write(Object obj) throws IOException {
// accessor in the MapObjectInspector.
Map, ?> valueMap = insp.getMap(obj);
lengths.write(valueMap.size());
+ if (createBloomFilter) {
+ bloomFilter.addLong(valueMap.size());
+ }
for(Map.Entry, ?> entry: valueMap.entrySet()) {
childrenWriters[0].write(entry.getKey());
childrenWriters[1].write(entry.getValue());
@@ -1765,6 +1894,9 @@ void write(Object obj) throws IOException {
UnionObjectInspector insp = (UnionObjectInspector) inspector;
byte tag = insp.getTag(obj);
tags.write(tag);
+ if (createBloomFilter) {
+ bloomFilter.addLong(tag);
+ }
childrenWriters[tag].write(insp.getField(obj));
}
}
diff --git ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
index 98459fb..dc16c12 100644
--- ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
+++ ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
@@ -86,6 +86,16 @@ message RowIndex {
repeated RowIndexEntry entry = 1;
}
+message BloomFilter {
+ optional uint64 numEntries = 1;
+ optional double fpp = 2;
+ repeated uint64 bitset = 3;
+}
+
+message BloomFilterIndex {
+ repeated BloomFilter bloomFilter = 1;
+}
+
message Stream {
// if you add new index stream kinds, you need to make sure to update
// StreamName to ensure it is added to the stripe in the right area
@@ -97,6 +107,7 @@ message Stream {
DICTIONARY_COUNT = 4;
SECONDARY = 5;
ROW_INDEX = 6;
+ BLOOM_FILTER = 7;
}
required Kind kind = 1;
optional uint32 column = 2;
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestBloomFilter.java ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestBloomFilter.java
new file mode 100644
index 0000000..7d1a1f3
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestBloomFilter.java
@@ -0,0 +1,459 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.filters;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Random;
+
+import org.junit.Test;
+
+/**
+ *
+ */
+public class TestBloomFilter {
+ private static final int COUNT = 100;
+ Random rand = new Random(123);
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testBloomIllegalArg1() {
+ BloomFilter bf = new BloomFilter(0, 0);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testBloomIllegalArg2() {
+ BloomFilter bf = new BloomFilter(0, 0.1);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testBloomIllegalArg3() {
+ BloomFilter bf = new BloomFilter(1, 0.0);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testBloomIllegalArg4() {
+ BloomFilter bf = new BloomFilter(1, 1.0);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testBloomIllegalArg5() {
+ BloomFilter bf = new BloomFilter(-1, -1);
+ }
+
+
+ @Test
+ public void testBloomNumBits() {
+ assertEquals(0, BloomFilter.optimalNumOfBits(0, 0));
+ assertEquals(1549, BloomFilter.optimalNumOfBits(1, 0));
+ assertEquals(0, BloomFilter.optimalNumOfBits(0, 1));
+ assertEquals(0, BloomFilter.optimalNumOfBits(1, 1));
+ assertEquals(7, BloomFilter.optimalNumOfBits(1, 0.03));
+ assertEquals(72, BloomFilter.optimalNumOfBits(10, 0.03));
+ assertEquals(729, BloomFilter.optimalNumOfBits(100, 0.03));
+ assertEquals(7298, BloomFilter.optimalNumOfBits(1000, 0.03));
+ assertEquals(72984, BloomFilter.optimalNumOfBits(10000, 0.03));
+ assertEquals(729844, BloomFilter.optimalNumOfBits(100000, 0.03));
+ assertEquals(7298440, BloomFilter.optimalNumOfBits(1000000, 0.03));
+ assertEquals(6235224, BloomFilter.optimalNumOfBits(1000000, 0.05));
+ }
+
+ @Test
+ public void testBloomNumHashFunctions() {
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(-1, -1));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(0, 0));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10, 0));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10, 10));
+ assertEquals(7, BloomFilter.optimalNumOfHashFunctions(10, 100));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(100, 100));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(1000, 100));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10000, 100));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(100000, 100));
+ assertEquals(1, BloomFilter.optimalNumOfHashFunctions(1000000, 100));
+ }
+
+ @Test
+ public void testBloomFilterBytes() {
+ BloomFilter bf = new BloomFilter(10000);
+ byte[] val = new byte[]{1, 2, 3};
+ byte[] val1 = new byte[]{1, 2, 3, 4};
+ byte[] val2 = new byte[]{1, 2, 3, 4, 5};
+ byte[] val3 = new byte[]{1, 2, 3, 4, 5, 6};
+
+ assertEquals(false, bf.test(val));
+ assertEquals(false, bf.test(val1));
+ assertEquals(false, bf.test(val2));
+ assertEquals(false, bf.test(val3));
+ bf.add(val);
+ assertEquals(true, bf.test(val));
+ assertEquals(false, bf.test(val1));
+ assertEquals(false, bf.test(val2));
+ assertEquals(false, bf.test(val3));
+ bf.add(val1);
+ assertEquals(true, bf.test(val));
+ assertEquals(true, bf.test(val1));
+ assertEquals(false, bf.test(val2));
+ assertEquals(false, bf.test(val3));
+ bf.add(val2);
+ assertEquals(true, bf.test(val));
+ assertEquals(true, bf.test(val1));
+ assertEquals(true, bf.test(val2));
+ assertEquals(false, bf.test(val3));
+ bf.add(val3);
+ assertEquals(true, bf.test(val));
+ assertEquals(true, bf.test(val1));
+ assertEquals(true, bf.test(val2));
+ assertEquals(true, bf.test(val3));
+
+ byte[] randVal = new byte[COUNT];
+ for (int i = 0; i < COUNT; i++) {
+ rand.nextBytes(randVal);
+ bf.add(randVal);
+ }
+ // last value should be present
+ assertEquals(true, bf.test(randVal));
+ // most likely this value should not exist
+ randVal[0] = 0;
+ randVal[1] = 0;
+ randVal[2] = 0;
+ randVal[3] = 0;
+ randVal[4] = 0;
+ assertEquals(false, bf.test(randVal));
+
+ assertEquals(7800, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testBloomFilterByte() {
+ BloomFilter bf = new BloomFilter(10000);
+ byte val = Byte.MIN_VALUE;
+ byte val1 = 1;
+ byte val2 = 2;
+ byte val3 = Byte.MAX_VALUE;
+
+ assertEquals(false, bf.testLong(val));
+ assertEquals(false, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(false, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val1);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val2);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(true, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val3);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(true, bf.testLong(val2));
+ assertEquals(true, bf.testLong(val3));
+
+ byte randVal = 0;
+ for (int i = 0; i < COUNT; i++) {
+ randVal = (byte) rand.nextInt(Byte.MAX_VALUE);
+ bf.addLong(randVal);
+ }
+ // last value should be present
+ assertEquals(true, bf.testLong(randVal));
+ // most likely this value should not exist
+ assertEquals(false, bf.testLong((byte) -120));
+
+ assertEquals(7800, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testBloomFilterInt() {
+ BloomFilter bf = new BloomFilter(10000);
+ int val = Integer.MIN_VALUE;
+ int val1 = 1;
+ int val2 = 2;
+ int val3 = Integer.MAX_VALUE;
+
+ assertEquals(false, bf.testLong(val));
+ assertEquals(false, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(false, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val1);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val2);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(true, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val3);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(true, bf.testLong(val2));
+ assertEquals(true, bf.testLong(val3));
+
+ int randVal = 0;
+ for (int i = 0; i < COUNT; i++) {
+ randVal = rand.nextInt();
+ bf.addLong(randVal);
+ }
+ // last value should be present
+ assertEquals(true, bf.testLong(randVal));
+ // most likely this value should not exist
+ assertEquals(false, bf.testLong(-120));
+
+ assertEquals(7800, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testBloomFilterLong() {
+ BloomFilter bf = new BloomFilter(10000);
+ long val = Long.MIN_VALUE;
+ long val1 = 1;
+ long val2 = 2;
+ long val3 = Long.MAX_VALUE;
+
+ assertEquals(false, bf.testLong(val));
+ assertEquals(false, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(false, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val1);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(false, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val2);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(true, bf.testLong(val2));
+ assertEquals(false, bf.testLong(val3));
+ bf.addLong(val3);
+ assertEquals(true, bf.testLong(val));
+ assertEquals(true, bf.testLong(val1));
+ assertEquals(true, bf.testLong(val2));
+ assertEquals(true, bf.testLong(val3));
+
+ long randVal = 0;
+ for (int i = 0; i < COUNT; i++) {
+ randVal = rand.nextLong();
+ bf.addLong(randVal);
+ }
+ // last value should be present
+ assertEquals(true, bf.testLong(randVal));
+ // most likely this value should not exist
+ assertEquals(false, bf.testLong(-120));
+
+ assertEquals(7800, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testBloomFilterFloat() {
+ BloomFilter bf = new BloomFilter(10000);
+ float val = Float.MIN_VALUE;
+ float val1 = 1.1f;
+ float val2 = 2.2f;
+ float val3 = Float.MAX_VALUE;
+
+ assertEquals(false, bf.testDouble(val));
+ assertEquals(false, bf.testDouble(val1));
+ assertEquals(false, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(false, bf.testDouble(val1));
+ assertEquals(false, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val1);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(true, bf.testDouble(val1));
+ assertEquals(false, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val2);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(true, bf.testDouble(val1));
+ assertEquals(true, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val3);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(true, bf.testDouble(val1));
+ assertEquals(true, bf.testDouble(val2));
+ assertEquals(true, bf.testDouble(val3));
+
+ float randVal = 0;
+ for (int i = 0; i < COUNT; i++) {
+ randVal = rand.nextFloat();
+ bf.addDouble(randVal);
+ }
+ // last value should be present
+ assertEquals(true, bf.testDouble(randVal));
+ // most likely this value should not exist
+ assertEquals(false, bf.testDouble(-120.2f));
+
+ assertEquals(7800, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testBloomFilterDouble() {
+ BloomFilter bf = new BloomFilter(10000);
+ double val = Double.MIN_VALUE;
+ double val1 = 1.1d;
+ double val2 = 2.2d;
+ double val3 = Double.MAX_VALUE;
+
+ assertEquals(false, bf.testDouble(val));
+ assertEquals(false, bf.testDouble(val1));
+ assertEquals(false, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(false, bf.testDouble(val1));
+ assertEquals(false, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val1);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(true, bf.testDouble(val1));
+ assertEquals(false, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val2);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(true, bf.testDouble(val1));
+ assertEquals(true, bf.testDouble(val2));
+ assertEquals(false, bf.testDouble(val3));
+ bf.addDouble(val3);
+ assertEquals(true, bf.testDouble(val));
+ assertEquals(true, bf.testDouble(val1));
+ assertEquals(true, bf.testDouble(val2));
+ assertEquals(true, bf.testDouble(val3));
+
+ double randVal = 0;
+ for (int i = 0; i < COUNT; i++) {
+ randVal = rand.nextDouble();
+ bf.addDouble(randVal);
+ }
+ // last value should be present
+ assertEquals(true, bf.testDouble(randVal));
+ // most likely this value should not exist
+ assertEquals(false, bf.testDouble(-120.2d));
+
+ assertEquals(7800, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testBloomFilterString() {
+ BloomFilter bf = new BloomFilter(100000);
+ String val = "bloo";
+ String val1 = "bloom fil";
+ String val2 = "bloom filter";
+ String val3 = "cuckoo filter";
+
+ assertEquals(false, bf.testString(val));
+ assertEquals(false, bf.testString(val1));
+ assertEquals(false, bf.testString(val2));
+ assertEquals(false, bf.testString(val3));
+ bf.addString(val);
+ assertEquals(true, bf.testString(val));
+ assertEquals(false, bf.testString(val1));
+ assertEquals(false, bf.testString(val2));
+ assertEquals(false, bf.testString(val3));
+ bf.addString(val1);
+ assertEquals(true, bf.testString(val));
+ assertEquals(true, bf.testString(val1));
+ assertEquals(false, bf.testString(val2));
+ assertEquals(false, bf.testString(val3));
+ bf.addString(val2);
+ assertEquals(true, bf.testString(val));
+ assertEquals(true, bf.testString(val1));
+ assertEquals(true, bf.testString(val2));
+ assertEquals(false, bf.testString(val3));
+ bf.addString(val3);
+ assertEquals(true, bf.testString(val));
+ assertEquals(true, bf.testString(val1));
+ assertEquals(true, bf.testString(val2));
+ assertEquals(true, bf.testString(val3));
+
+ long randVal = 0;
+ for (int i = 0; i < COUNT; i++) {
+ randVal = rand.nextLong();
+ bf.addString(Long.toString(randVal));
+ }
+ // last value should be present
+ assertEquals(true, bf.testString(Long.toString(randVal)));
+ // most likely this value should not exist
+ assertEquals(false, bf.testString(Long.toString(-120)));
+
+ assertEquals(77944, bf.sizeInBytes());
+ }
+
+ @Test
+ public void testMerge() {
+ BloomFilter bf = new BloomFilter(10000);
+ String val = "bloo";
+ String val1 = "bloom fil";
+ String val2 = "bloom filter";
+ String val3 = "cuckoo filter";
+ bf.addString(val);
+ bf.addString(val1);
+ bf.addString(val2);
+ bf.addString(val3);
+
+ BloomFilter bf2 = new BloomFilter(10000);
+ String v = "2_bloo";
+ String v1 = "2_bloom fil";
+ String v2 = "2_bloom filter";
+ String v3 = "2_cuckoo filter";
+ bf2.addString(v);
+ bf2.addString(v1);
+ bf2.addString(v2);
+ bf2.addString(v3);
+
+ assertEquals(true, bf.testString(val));
+ assertEquals(true, bf.testString(val1));
+ assertEquals(true, bf.testString(val2));
+ assertEquals(true, bf.testString(val3));
+ assertEquals(false, bf.testString(v));
+ assertEquals(false, bf.testString(v1));
+ assertEquals(false, bf.testString(v2));
+ assertEquals(false, bf.testString(v3));
+
+ bf.merge(bf2);
+
+ assertEquals(true, bf.testString(val));
+ assertEquals(true, bf.testString(val1));
+ assertEquals(true, bf.testString(val2));
+ assertEquals(true, bf.testString(val3));
+ assertEquals(true, bf.testString(v));
+ assertEquals(true, bf.testString(v1));
+ assertEquals(true, bf.testString(v2));
+ assertEquals(true, bf.testString(v3));
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestMurmur3.java ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestMurmur3.java
new file mode 100644
index 0000000..d92a3ce
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/filters/TestMurmur3.java
@@ -0,0 +1,189 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.filters;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Random;
+
+/**
+ * Tests for Murmur3 variants.
+ */
+public class TestMurmur3 {
+
+ @Test
+ public void testHashCodesM3_32_string() {
+ String key = "test";
+ int seed = 123;
+ HashFunction hf = Hashing.murmur3_32(seed);
+ int hc1 = hf.hashBytes(key.getBytes()).asInt();
+ int hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed);
+ assertEquals(hc1, hc2);
+
+ key = "testkey";
+ hc1 = hf.hashBytes(key.getBytes()).asInt();
+ hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed);
+ assertEquals(hc1, hc2);
+ }
+
+ @Test
+ public void testHashCodesM3_32_ints() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_32(seed);
+ for (int i = 0; i < 1000; i++) {
+ int val = rand.nextInt();
+ byte[] data = ByteBuffer.allocate(4).putInt(val).array();
+ int hc1 = hf.hashBytes(data).asInt();
+ int hc2 = Murmur3.hash32(data, data.length, seed);
+ assertEquals(hc1, hc2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_32_longs() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_32(seed);
+ for (int i = 0; i < 1000; i++) {
+ long val = rand.nextLong();
+ byte[] data = ByteBuffer.allocate(8).putLong(val).array();
+ int hc1 = hf.hashBytes(data).asInt();
+ int hc2 = Murmur3.hash32(data, data.length, seed);
+ assertEquals(hc1, hc2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_32_double() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_32(seed);
+ for (int i = 0; i < 1000; i++) {
+ double val = rand.nextDouble();
+ byte[] data = ByteBuffer.allocate(8).putDouble(val).array();
+ int hc1 = hf.hashBytes(data).asInt();
+ int hc2 = Murmur3.hash32(data, data.length, seed);
+ assertEquals(hc1, hc2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_128_string() {
+ String key = "test";
+ int seed = 123;
+ HashFunction hf = Hashing.murmur3_128(seed);
+ // guava stores the hashcodes in little endian order
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(key.getBytes()).asBytes());
+ buf.flip();
+ long gl1 = buf.getLong();
+ long gl2 = buf.getLong(8);
+ long[] hc = Murmur3.hash128(key.getBytes(), key.getBytes().length, seed);
+ long m1 = hc[0];
+ long m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+
+ key = "testkey128_testkey128";
+ buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(key.getBytes()).asBytes());
+ buf.flip();
+ gl1 = buf.getLong();
+ gl2 = buf.getLong(8);
+ hc = Murmur3.hash128(key.getBytes(), key.getBytes().length, seed);
+ m1 = hc[0];
+ m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+ }
+
+ @Test
+ public void testHashCodesM3_128_ints() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_128(seed);
+ for (int i = 0; i < 1000; i++) {
+ int val = rand.nextInt();
+ byte[] data = ByteBuffer.allocate(4).putInt(val).array();
+ // guava stores the hashcodes in little endian order
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(data).asBytes());
+ buf.flip();
+ long gl1 = buf.getLong();
+ long gl2 = buf.getLong(8);
+ long[] hc = Murmur3.hash128(data, data.length, seed);
+ long m1 = hc[0];
+ long m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_128_longs() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_128(seed);
+ for (int i = 0; i < 1000; i++) {
+ long val = rand.nextLong();
+ byte[] data = ByteBuffer.allocate(8).putLong(val).array();
+ // guava stores the hashcodes in little endian order
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(data).asBytes());
+ buf.flip();
+ long gl1 = buf.getLong();
+ long gl2 = buf.getLong(8);
+ long[] hc = Murmur3.hash128(data, data.length, seed);
+ long m1 = hc[0];
+ long m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_128_double() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_128(seed);
+ for (int i = 0; i < 1000; i++) {
+ double val = rand.nextDouble();
+ byte[] data = ByteBuffer.allocate(8).putDouble(val).array();
+ // guava stores the hashcodes in little endian order
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(data).asBytes());
+ buf.flip();
+ long gl1 = buf.getLong();
+ long gl2 = buf.getLong(8);
+ long[] hc = Murmur3.hash128(data, data.length, seed);
+ long m1 = hc[0];
+ long m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+ }
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
index e451143..00afdac 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
@@ -21,19 +21,6 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hive.common.util.HiveTestUtils;
-import org.junit.Before;
-import org.junit.Test;
-
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
@@ -49,6 +36,19 @@
import java.util.Map;
import java.util.Random;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hive.common.util.HiveTestUtils;
+import org.junit.Before;
+import org.junit.Test;
+
public class TestFileDump {
Path workDir = new Path(System.getProperty("test.tmp.dir"));
@@ -303,4 +303,101 @@ public void testDictionaryThreshold() throws Exception {
checkOutput(outputFilename, workDir + File.separator + outputFilename);
}
+
+ @Test
+ public void testBloomFilter() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
+ OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+ .fileSystem(fs)
+ .inspector(inspector)
+ .stripeSize(100000)
+ .compress(CompressionKind.ZLIB)
+ .bufferSize(10000)
+ .rowIndexStride(1000)
+ .bloomFilterColumns("s");
+ Writer writer = OrcFile.createWriter(testFilePath, options);
+ Random r1 = new Random(1);
+ String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+ "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+ "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+ "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+ "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+ "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+ "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+ "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+ "before", "us,", "we", "were", "all", "going", "direct", "to",
+ "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+ "way"};
+ for(int i=0; i < 21000; ++i) {
+ writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(),
+ words[r1.nextInt(words.length)]));
+ }
+ writer.close();
+ PrintStream origOut = System.out;
+ String outputFilename = "orc-file-dump-bloomfilter.out";
+ FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString(), "--rowindex=3"});
+ System.out.flush();
+ System.setOut(origOut);
+
+
+ checkOutput(outputFilename, workDir + File.separator + outputFilename);
+ }
+
+ @Test
+ public void testBloomFilter2() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
+ OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+ .fileSystem(fs)
+ .inspector(inspector)
+ .stripeSize(100000)
+ .compress(CompressionKind.ZLIB)
+ .bufferSize(10000)
+ .rowIndexStride(1000)
+ .bloomFilterColumns("l")
+ .bloomFilterFpp(0.01);
+ Writer writer = OrcFile.createWriter(testFilePath, options);
+ Random r1 = new Random(1);
+ String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+ "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+ "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+ "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+ "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+ "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+ "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+ "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+ "before", "us,", "we", "were", "all", "going", "direct", "to",
+ "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+ "way"};
+ for(int i=0; i < 21000; ++i) {
+ writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(),
+ words[r1.nextInt(words.length)]));
+ }
+ writer.close();
+ PrintStream origOut = System.out;
+ String outputFilename = "orc-file-dump-bloomfilter2.out";
+ FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"});
+ System.out.flush();
+ System.setOut(origOut);
+
+
+ checkOutput(outputFilename, workDir + File.separator + outputFilename);
+ }
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 5c48d0b..cd1d645 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -515,6 +515,10 @@ public void testTimestamp() throws Exception {
Object row = rows.next(null);
assertEquals(tslist.get(idx++).getNanos(), ((TimestampWritable) row).getNanos());
}
+ assertEquals(1, OrcUtils.getFlattenedColumnsCount(inspector));
+ boolean[] expected = new boolean[] {false};
+ boolean[] included = OrcUtils.includeColumns("", "ts", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
}
@Test
@@ -538,6 +542,19 @@ public void testStringAndBinaryStatistics() throws Exception {
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
+ assertEquals(3, OrcUtils.getFlattenedColumnsCount(inspector));
+ boolean[] expected = new boolean[] {false, false, true};
+ boolean[] included = OrcUtils.includeColumns("string1", "bytes1,string1", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, false, false};
+ included = OrcUtils.includeColumns("", "bytes1,string1", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, false, false};
+ included = OrcUtils.includeColumns(null, "bytes1,string1", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(4, stats[0].getNumberOfValues());
@@ -634,6 +651,12 @@ public void testStripeLevelStats() throws Exception {
writer.close();
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
+
+ assertEquals(3, OrcUtils.getFlattenedColumnsCount(inspector));
+ boolean[] expected = new boolean[] {false, true, false};
+ boolean[] included = OrcUtils.includeColumns("int1", "int1,string1", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
Metadata metadata = reader.getMetadata();
int numStripes = metadata.getStripeStatistics().size();
assertEquals(3, numStripes);
@@ -672,7 +695,7 @@ public void testStripeLevelStats() throws Exception {
assertEquals(5000, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getSum());
RecordReaderImpl recordReader = (RecordReaderImpl) reader.rows();
- OrcProto.RowIndex[] index = recordReader.readRowIndex(0);
+ OrcProto.RowIndex[] index = recordReader.readRowIndex(0, null).getRowGroupIndex();
assertEquals(3, index.length);
List items = index[1].getEntryList();
assertEquals(1, items.size());
@@ -682,7 +705,7 @@ public void testStripeLevelStats() throws Exception {
assertEquals(0, items.get(0).getPositions(2));
assertEquals(1,
items.get(0).getStatistics().getIntStatistics().getMinimum());
- index = recordReader.readRowIndex(1);
+ index = recordReader.readRowIndex(1, null).getRowGroupIndex();
assertEquals(3, index.length);
items = index[1].getEntryList();
assertEquals(2,
@@ -715,6 +738,44 @@ public void test1() throws Exception {
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
+ assertEquals(24, OrcUtils.getFlattenedColumnsCount(inspector));
+ boolean[] expected = new boolean[] {false, false, false, false, false,
+ false, false, false, false, false,
+ false, false, false, false, false,
+ false, false, false, false, false,
+ false, false, false, false};
+ boolean[] included = OrcUtils.includeColumns("",
+ "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, true, false, false, false,
+ false, false, false, false, true,
+ true, true, true, true, true,
+ false, false, false, false, true,
+ true, true, true, true};
+ included = OrcUtils.includeColumns("boolean1,string1,middle,map",
+ "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, true, false, false, false,
+ false, false, false, false, true,
+ true, true, true, true, true,
+ false, false, false, false, true,
+ true, true, true, true};
+ included = OrcUtils.includeColumns("boolean1,string1,middle,map",
+ "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, true, true, true, true,
+ true, true, true, true, true,
+ true, true, true, true, true,
+ true, true, true, true, true,
+ true, true, true, true};
+ included = OrcUtils.includeColumns(
+ "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map",
+ "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
Metadata metadata = reader.getMetadata();
// check the stats
@@ -1183,6 +1244,20 @@ public void testUnionAndTimestamp() throws Exception {
writer.close();
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
+
+ assertEquals(6, OrcUtils.getFlattenedColumnsCount(inspector));
+ boolean[] expected = new boolean[] {false, false, false, false, false, false};
+ boolean[] included = OrcUtils.includeColumns("", "time,union,decimal", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, true, false, false, false, true};
+ included = OrcUtils.includeColumns("time,decimal", "time,union,decimal", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
+ expected = new boolean[] {false, false, true, true, true, false};
+ included = OrcUtils.includeColumns("union", "time,union,decimal", inspector);
+ assertEquals(true, Arrays.equals(expected, included));
+
assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
assertEquals(5309, reader.getNumberOfRows());
DecimalColumnStatistics stats =
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
index a86d19f..db0ce0e 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
@@ -320,27 +320,27 @@ public void testPredEvalWithIntStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10, 100), pred, null));
}
@Test
@@ -348,27 +348,27 @@ public void testPredEvalWithDoubleStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred));
+ RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred));
+ RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred));
+ RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred));
+ RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred));
+ RecordReaderImpl.evaluatePredicate(createDoubleStats(10.0, 100.0), pred, null));
}
@Test
@@ -376,27 +376,27 @@ public void testPredEvalWithStringStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 100, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 100.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "100", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(100), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(100), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("10", "1000"), pred, null));
}
@Test
@@ -404,57 +404,57 @@ public void testPredEvalWithDateStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "1970-01-11", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15.1", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "__a15__1", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "2000-01-16", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "1970-01-16", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(150), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred));
+ RecordReaderImpl.evaluatePredicate(createDateStats(10, 100), pred, null));
}
@@ -463,27 +463,27 @@ public void testPredEvalWithDecimalStats() throws Exception {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred));
+ RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred));
+ RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred));
+ RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred));
+ RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred, null));
pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred));
+ RecordReaderImpl.evaluatePredicate(createDecimalStats("10.0", "100.0"), pred, null));
}
@@ -493,17 +493,17 @@ public void testEquals() throws Exception {
(PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.INTEGER,
"x", 15L, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred, null));
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred, null));
}
@Test
@@ -512,17 +512,17 @@ public void testNullSafeEquals() throws Exception {
(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER,
"x", 15L, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred, null));
}
@Test
@@ -531,15 +531,15 @@ public void testLessThan() throws Exception {
(PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.INTEGER,
"x", 15L, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), lessThan));
+ RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), lessThan, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), lessThan));
+ RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), lessThan, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), lessThan));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), lessThan, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), lessThan));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), lessThan, null));
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), lessThan));
+ RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), lessThan, null));
}
@Test
@@ -548,15 +548,15 @@ public void testLessThanEquals() throws Exception {
(PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.INTEGER,
"x", 15L, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred, null));
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred, null));
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred, null));
}
@Test
@@ -568,13 +568,13 @@ public void testIn() throws Exception {
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.INTEGER,
"x", null, args);
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(20L, 20L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(20L, 20L), pred, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(30L, 30L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(30L, 30L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred, null));
}
@Test
@@ -586,19 +586,19 @@ public void testBetween() throws Exception {
(PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.INTEGER,
"x", null, args);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(0L, 5L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(0L, 5L), pred, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(30L, 40L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(30L, 40L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(5L, 15L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(5L, 15L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(15L, 25L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(15L, 25L), pred, null));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(5L, 25L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(5L, 25L), pred, null));
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(10L, 20L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(10L, 20L), pred, null));
assertEquals(TruthValue.YES,
- RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred, null));
}
@Test
@@ -607,7 +607,7 @@ public void testIsNull() throws Exception {
(PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.INTEGER,
"x", null, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
+ RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred, null));
}
@@ -617,17 +617,17 @@ public void testEqualsWithNullInStats() throws Exception {
(PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+ RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred, null)); // before
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred, null)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred, null)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred, null)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred, null)); // middle
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred, null)); // same
}
@Test
@@ -636,17 +636,17 @@ public void testNullSafeEqualsWithNullInStats() throws Exception {
(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+ RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred, null)); // before
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred, null)); // after
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred, null)); // max
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred, null)); // min
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred, null)); // middle
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred, null)); // same
}
@Test
@@ -655,17 +655,17 @@ public void testLessThanWithNullInStats() throws Exception {
(PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+ RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred, null)); // before
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred, null)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred, null)); // max
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred, null)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred, null)); // middle
assertEquals(TruthValue.NO_NULL, // min, same stats
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred, null));
}
@Test
@@ -674,17 +674,17 @@ public void testLessThanEqualsWithNullInStats() throws Exception {
(PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+ RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred, null)); // before
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred, null)); // after
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred, null)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred, null)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred, null)); // middle
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred, null)); // same
}
@Test
@@ -696,17 +696,17 @@ public void testInWithNullInStats() throws Exception {
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING,
"x", null, args);
assertEquals(TruthValue.NO_NULL, // before & after
- RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred, null));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred, null)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred)); // max
+ RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred, null)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred, null)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred, null)); // middle
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred, null)); // same
}
@Test
@@ -718,31 +718,31 @@ public void testBetweenWithNullInStats() throws Exception {
(PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.STRING,
"x", null, args);
assertEquals(TruthValue.YES_NULL, // before & after
- RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred, null));
assertEquals(TruthValue.YES_NULL, // before & max
- RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred, null));
assertEquals(TruthValue.NO_NULL, // before & before
- RecordReaderImpl.evaluatePredicate(createStringStats("h", "g", true), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("h", "g", true), pred, null));
assertEquals(TruthValue.YES_NO_NULL, // before & min
- RecordReaderImpl.evaluatePredicate(createStringStats("f", "g", true), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("f", "g", true), pred, null));
assertEquals(TruthValue.YES_NO_NULL, // before & middle
- RecordReaderImpl.evaluatePredicate(createStringStats("e", "g", true), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("e", "g", true), pred, null));
assertEquals(TruthValue.YES_NULL, // min & after
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "e", true), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "e", true), pred, null));
assertEquals(TruthValue.YES_NULL, // min & max
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "f", true), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "f", true), pred, null));
assertEquals(TruthValue.YES_NO_NULL, // min & middle
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "g", true), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "g", true), pred, null));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred, null)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("a", "c", true), pred)); // max
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "c", true), pred, null)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred, null)); // middle
assertEquals(TruthValue.YES_NULL, // min & after, same stats
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred, null));
}
@Test
@@ -751,9 +751,9 @@ public void testIsNullWithNullInStats() throws Exception {
(PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING,
"x", null, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred, null));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", false), pred));
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", false), pred, null));
}
@Test
diff --git ql/src/test/resources/orc-file-dump-bloomfilter.out ql/src/test/resources/orc-file-dump-bloomfilter.out
new file mode 100644
index 0000000..6b4ca33
--- /dev/null
+++ ql/src/test/resources/orc-file-dump-bloomfilter.out
@@ -0,0 +1,177 @@
+Structure for TestFileDump.testDump.orc
+File Version: 0.12 with HIVE_8732
+Rows: 21000
+Compression: ZLIB
+Compression size: 10000
+Type: struct
+
+Stripe Statistics:
+ Stripe 1:
+ Column 0: count: 5000 hasNull: false
+ Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 515792826
+ Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 9221614132680747961
+ Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280
+ Stripe 2:
+ Column 0: count: 5000 hasNull: false
+ Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 7673427
+ Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 9222259462014003839
+ Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504
+ Stripe 3:
+ Column 0: count: 5000 hasNull: false
+ Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 132660742551
+ Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 9222303228623055266
+ Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641
+ Stripe 4:
+ Column 0: count: 5000 hasNull: false
+ Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 8533549236
+ Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 9221043130193737406
+ Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470
+ Stripe 5:
+ Column 0: count: 1000 hasNull: false
+ Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363
+ Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476
+ Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866
+
+File Statistics:
+ Column 0: count: 21000 hasNull: false
+ Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 193017464403
+ Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 9222303228623055266
+ Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
+
+Stripes:
+ Stripe: offset: 3 data: 63765 rows: 5000 tail: 86 index: 751
+ Stream: column 0 section ROW_INDEX start: 3 length 17
+ Stream: column 1 section ROW_INDEX start: 20 length 164
+ Stream: column 2 section ROW_INDEX start: 184 length 173
+ Stream: column 3 section ROW_INDEX start: 357 length 87
+ Stream: column 3 section BLOOM_FILTER start: 444 length 310
+ Stream: column 1 section DATA start: 754 length 20029
+ Stream: column 2 section DATA start: 20783 length 40035
+ Stream: column 3 section DATA start: 60818 length 3543
+ Stream: column 3 section LENGTH start: 64361 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 64386 length 133
+ Encoding column 0: DIRECT
+ Encoding column 1: DIRECT_V2
+ Encoding column 2: DIRECT_V2
+ Encoding column 3: DICTIONARY_V2[35]
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149
+ Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3
+ Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32
+ Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45
+ Bloom filters for column 3:
+ Entry 0: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 1: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 2: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 3: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 4: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Stripe level merge: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Stripe: offset: 64605 data: 63754 rows: 5000 tail: 86 index: 743
+ Stream: column 0 section ROW_INDEX start: 64605 length 17
+ Stream: column 1 section ROW_INDEX start: 64622 length 162
+ Stream: column 2 section ROW_INDEX start: 64784 length 171
+ Stream: column 3 section ROW_INDEX start: 64955 length 83
+ Stream: column 3 section BLOOM_FILTER start: 65038 length 310
+ Stream: column 1 section DATA start: 65348 length 20029
+ Stream: column 2 section DATA start: 85377 length 40035
+ Stream: column 3 section DATA start: 125412 length 3532
+ Stream: column 3 section LENGTH start: 128944 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 128969 length 133
+ Encoding column 0: DIRECT
+ Encoding column 1: DIRECT_V2
+ Encoding column 2: DIRECT_V2
+ Encoding column 3: DICTIONARY_V2[35]
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12
+ Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70
+ Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43
+ Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88
+ Bloom filters for column 3:
+ Entry 0: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 1: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 2: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 3: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 4: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Stripe level merge: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Stripe: offset: 129188 data: 63766 rows: 5000 tail: 86 index: 747
+ Stream: column 0 section ROW_INDEX start: 129188 length 17
+ Stream: column 1 section ROW_INDEX start: 129205 length 159
+ Stream: column 2 section ROW_INDEX start: 129364 length 171
+ Stream: column 3 section ROW_INDEX start: 129535 length 90
+ Stream: column 3 section BLOOM_FILTER start: 129625 length 310
+ Stream: column 1 section DATA start: 129935 length 20029
+ Stream: column 2 section DATA start: 149964 length 40035
+ Stream: column 3 section DATA start: 189999 length 3544
+ Stream: column 3 section LENGTH start: 193543 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 193568 length 133
+ Encoding column 0: DIRECT
+ Encoding column 1: DIRECT_V2
+ Encoding column 2: DIRECT_V2
+ Encoding column 3: DICTIONARY_V2[35]
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174
+ Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69
+ Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194
+ Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43
+ Bloom filters for column 3:
+ Entry 0: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 1: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 2: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 3: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 4: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Stripe level merge: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Stripe: offset: 193787 data: 63796 rows: 5000 tail: 86 index: 750
+ Stream: column 0 section ROW_INDEX start: 193787 length 17
+ Stream: column 1 section ROW_INDEX start: 193804 length 162
+ Stream: column 2 section ROW_INDEX start: 193966 length 170
+ Stream: column 3 section ROW_INDEX start: 194136 length 91
+ Stream: column 3 section BLOOM_FILTER start: 194227 length 310
+ Stream: column 1 section DATA start: 194537 length 20029
+ Stream: column 2 section DATA start: 214566 length 40035
+ Stream: column 3 section DATA start: 254601 length 3574
+ Stream: column 3 section LENGTH start: 258175 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 258200 length 133
+ Encoding column 0: DIRECT
+ Encoding column 1: DIRECT_V2
+ Encoding column 2: DIRECT_V2
+ Encoding column 3: DICTIONARY_V2[35]
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431
+ Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52
+ Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104
+ Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131
+ Bloom filters for column 3:
+ Entry 0: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 1: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 2: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 3: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Entry 4: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Stripe level merge: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Stripe: offset: 258419 data: 12940 rows: 1000 tail: 78 index: 394
+ Stream: column 0 section ROW_INDEX start: 258419 length 12
+ Stream: column 1 section ROW_INDEX start: 258431 length 38
+ Stream: column 2 section ROW_INDEX start: 258469 length 41
+ Stream: column 3 section ROW_INDEX start: 258510 length 40
+ Stream: column 3 section BLOOM_FILTER start: 258550 length 263
+ Stream: column 1 section DATA start: 258813 length 4007
+ Stream: column 2 section DATA start: 262820 length 8007
+ Stream: column 3 section DATA start: 270827 length 768
+ Stream: column 3 section LENGTH start: 271595 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 271620 length 133
+ Encoding column 0: DIRECT
+ Encoding column 1: DIRECT_V2
+ Encoding column 2: DIRECT_V2
+ Encoding column 3: DICTIONARY_V2[35]
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0
+ Bloom filters for column 3:
+ Entry 0: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+ Stripe level merge: n: 1000 fpp: 0.05 k: 4 bitCount: 6272 popCount: 137 loadFactor: 0.0218 expectedFpp: 2.27645E-7
+
+File length: 272376 bytes
+Padding length: 0 bytes
+Padding ratio: 0%
diff --git ql/src/test/resources/orc-file-dump-bloomfilter2.out ql/src/test/resources/orc-file-dump-bloomfilter2.out
new file mode 100644
index 0000000..d053a70
--- /dev/null
+++ ql/src/test/resources/orc-file-dump-bloomfilter2.out
@@ -0,0 +1,177 @@
+Structure for TestFileDump.testDump.orc
+File Version: 0.12 with HIVE_8732
+Rows: 21000
+Compression: ZLIB
+Compression size: 10000
+Type: struct
+
+Stripe Statistics:
+ Stripe 1:
+ Column 0: count: 5000 hasNull: false
+ Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 515792826
+ Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 9221614132680747961
+ Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280
+ Stripe 2:
+ Column 0: count: 5000 hasNull: false
+ Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 7673427
+ Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 9222259462014003839
+ Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504
+ Stripe 3:
+ Column 0: count: 5000 hasNull: false
+ Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 132660742551
+ Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 9222303228623055266
+ Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641
+ Stripe 4:
+ Column 0: count: 5000 hasNull: false
+ Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 8533549236
+ Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 9221043130193737406
+ Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470
+ Stripe 5:
+ Column 0: count: 1000 hasNull: false
+ Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363
+ Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476
+ Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866
+
+File Statistics:
+ Column 0: count: 21000 hasNull: false
+ Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 193017464403
+ Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 9222303228623055266
+ Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
+
+Stripes:
+ Stripe: offset: 3 data: 63765 rows: 5000 tail: 85 index: 7502
+ Stream: column 0 section ROW_INDEX start: 3 length 17
+ Stream: column 1 section ROW_INDEX start: 20 length 164
+ Stream: column 2 section ROW_INDEX start: 184 length 173
+ Stream: column 2 section BLOOM_FILTER start: 357 length 7061
+ Stream: column 3 section ROW_INDEX start: 7418 length 87
+ Stream: column 1 section DATA start: 7505 length 20029
+ Stream: column 2 section DATA start: 27534 length 40035
+ Stream: column 3 section DATA start: 67569 length 3543
+ Stream: column 3 section LENGTH start: 71112 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 71137 length 133
+ Encoding column 0: DIRECT
+ Encoding column 1: DIRECT_V2
+ Encoding column 2: DIRECT_V2
+ Encoding column 3: DICTIONARY_V2[35]
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 0,4098,488
+ Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 10003,2294,464
+ Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20006,490,440
+ Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 20006,8686,416
+ Bloom filters for column 2:
+ Entry 0: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4926 loadFactor: 0.5131 expectedFpp: 0.009366174
+ Entry 1: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4963 loadFactor: 0.517 expectedFpp: 0.009869866
+ Entry 2: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4959 loadFactor: 0.5166 expectedFpp: 0.009814321
+ Entry 3: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4924 loadFactor: 0.5129 expectedFpp: 0.00933959
+ Entry 4: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4973 loadFactor: 0.518 expectedFpp: 0.0100099165
+ Stripe level merge: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 9333 loadFactor: 0.9722 expectedFpp: 0.82082444
+ Stripe: offset: 71355 data: 63754 rows: 5000 tail: 85 index: 7503
+ Stream: column 0 section ROW_INDEX start: 71355 length 17
+ Stream: column 1 section ROW_INDEX start: 71372 length 162
+ Stream: column 2 section ROW_INDEX start: 71534 length 171
+ Stream: column 2 section BLOOM_FILTER start: 71705 length 7070
+ Stream: column 3 section ROW_INDEX start: 78775 length 83
+ Stream: column 1 section DATA start: 78858 length 20029
+ Stream: column 2 section DATA start: 98887 length 40035
+ Stream: column 3 section DATA start: 138922 length 3532
+ Stream: column 3 section LENGTH start: 142454 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 142479 length 133
+ Encoding column 0: DIRECT
+ Encoding column 1: DIRECT_V2
+ Encoding column 2: DIRECT_V2
+ Encoding column 3: DICTIONARY_V2[35]
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 0,4098,488
+ Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 10003,2294,464
+ Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20006,490,440
+ Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 20006,8686,416
+ Bloom filters for column 2:
+ Entry 0: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4928 loadFactor: 0.5133 expectedFpp: 0.009392824
+ Entry 1: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4963 loadFactor: 0.517 expectedFpp: 0.009869866
+ Entry 2: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4965 loadFactor: 0.5172 expectedFpp: 0.009897739
+ Entry 3: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4937 loadFactor: 0.5143 expectedFpp: 0.009513565
+ Entry 4: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4954 loadFactor: 0.516 expectedFpp: 0.009745263
+ Stripe level merge: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 9338 loadFactor: 0.9727 expectedFpp: 0.82390755
+ Stripe: offset: 142697 data: 63766 rows: 5000 tail: 85 index: 7507
+ Stream: column 0 section ROW_INDEX start: 142697 length 17
+ Stream: column 1 section ROW_INDEX start: 142714 length 159
+ Stream: column 2 section ROW_INDEX start: 142873 length 171
+ Stream: column 2 section BLOOM_FILTER start: 143044 length 7070
+ Stream: column 3 section ROW_INDEX start: 150114 length 90
+ Stream: column 1 section DATA start: 150204 length 20029
+ Stream: column 2 section DATA start: 170233 length 40035
+ Stream: column 3 section DATA start: 210268 length 3544
+ Stream: column 3 section LENGTH start: 213812 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 213837 length 133
+ Encoding column 0: DIRECT
+ Encoding column 1: DIRECT_V2
+ Encoding column 2: DIRECT_V2
+ Encoding column 3: DICTIONARY_V2[35]
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 0,4098,488
+ Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 10003,2294,464
+ Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20006,490,440
+ Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 20006,8686,416
+ Bloom filters for column 2:
+ Entry 0: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4994 loadFactor: 0.5202 expectedFpp: 0.010309587
+ Entry 1: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4954 loadFactor: 0.516 expectedFpp: 0.009745263
+ Entry 2: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4949 loadFactor: 0.5155 expectedFpp: 0.009676614
+ Entry 3: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4992 loadFactor: 0.52 expectedFpp: 0.010280714
+ Entry 4: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4987 loadFactor: 0.5195 expectedFpp: 0.010208852
+ Stripe level merge: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 9335 loadFactor: 0.9724 expectedFpp: 0.8220564
+ Stripe: offset: 214055 data: 63796 rows: 5000 tail: 85 index: 7510
+ Stream: column 0 section ROW_INDEX start: 214055 length 17
+ Stream: column 1 section ROW_INDEX start: 214072 length 162
+ Stream: column 2 section ROW_INDEX start: 214234 length 170
+ Stream: column 2 section BLOOM_FILTER start: 214404 length 7070
+ Stream: column 3 section ROW_INDEX start: 221474 length 91
+ Stream: column 1 section DATA start: 221565 length 20029
+ Stream: column 2 section DATA start: 241594 length 40035
+ Stream: column 3 section DATA start: 281629 length 3574
+ Stream: column 3 section LENGTH start: 285203 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 285228 length 133
+ Encoding column 0: DIRECT
+ Encoding column 1: DIRECT_V2
+ Encoding column 2: DIRECT_V2
+ Encoding column 3: DICTIONARY_V2[35]
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 0,4098,488
+ Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 10003,2294,464
+ Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20006,490,440
+ Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 20006,8686,416
+ Bloom filters for column 2:
+ Entry 0: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4987 loadFactor: 0.5195 expectedFpp: 0.010208852
+ Entry 1: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4970 loadFactor: 0.5177 expectedFpp: 0.009967729
+ Entry 2: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4958 loadFactor: 0.5165 expectedFpp: 0.009800472
+ Entry 3: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4966 loadFactor: 0.5173 expectedFpp: 0.009911705
+ Entry 4: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4977 loadFactor: 0.5184 expectedFpp: 0.010066416
+ Stripe level merge: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 9323 loadFactor: 0.9711 expectedFpp: 0.81468755
+ Stripe: offset: 285446 data: 12940 rows: 1000 tail: 78 index: 1606
+ Stream: column 0 section ROW_INDEX start: 285446 length 12
+ Stream: column 1 section ROW_INDEX start: 285458 length 38
+ Stream: column 2 section ROW_INDEX start: 285496 length 41
+ Stream: column 2 section BLOOM_FILTER start: 285537 length 1475
+ Stream: column 3 section ROW_INDEX start: 287012 length 40
+ Stream: column 1 section DATA start: 287052 length 4007
+ Stream: column 2 section DATA start: 291059 length 8007
+ Stream: column 3 section DATA start: 299066 length 768
+ Stream: column 3 section LENGTH start: 299834 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 299859 length 133
+ Encoding column 0: DIRECT
+ Encoding column 1: DIRECT_V2
+ Encoding column 2: DIRECT_V2
+ Encoding column 3: DICTIONARY_V2[35]
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 positions: 0,0,0
+ Bloom filters for column 2:
+ Entry 0: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4999 loadFactor: 0.5207 expectedFpp: 0.010382057
+ Stripe level merge: n: 1000 fpp: 0.01 k: 7 bitCount: 9600 popCount: 4999 loadFactor: 0.5207 expectedFpp: 0.010382057
+
+File length: 300614 bytes
+Padding length: 0 bytes
+Padding ratio: 0%
diff --git ql/src/test/resources/orc-file-dump-dictionary-threshold.out ql/src/test/resources/orc-file-dump-dictionary-threshold.out
index ac74e51..13e316e 100644
--- ql/src/test/resources/orc-file-dump-dictionary-threshold.out
+++ ql/src/test/resources/orc-file-dump-dictionary-threshold.out
@@ -52,24 +52,24 @@ Stripes:
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DIRECT_V2
- Row group index column 1:
- Entry 0:count: 1000 hasNull: false min: -2132329551 max: 2145911404 sum: 61941331718 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -2138433136 max: 2145210552 sum: 14574030042 positions: 0,2050,488
- Entry 2:count: 1000 hasNull: false min: -2147115959 max: 2137805337 sum: -2032493169 positions: 0,6150,464
- Entry 3:count: 1000 hasNull: false min: -2137828953 max: 2145877119 sum: -3167202608 positions: 10003,250,440
- Entry 4:count: 1000 hasNull: false min: -2146452517 max: 2142394906 sum: 88361503212 positions: 10003,4350,416
- Row group index column 2:
- Entry 0:count: 1000 hasNull: false min: -9206837518492372266 max: 9169230975203934579 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -9188878639954124284 max: 9213664245516510068 positions: 0,4098,488
- Entry 2:count: 1000 hasNull: false min: -9211329013123260308 max: 9217851628057711416 positions: 10003,2294,464
- Entry 3:count: 1000 hasNull: false min: -9185745718227889962 max: 9181722705210917931 positions: 20006,490,440
- Entry 4:count: 1000 hasNull: false min: -9216505819108477308 max: 9196474183833079923 positions: 20006,8686,416
- Row group index column 3:
- Entry 0:count: 1000 hasNull: false min: Darkness,-230 max: worst-54-290-346-648-908-996 sum: 18442 positions: 0,0,0,0,0
- Entry 1:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966 sum: 46338 positions: 2777,8442,0,695,18
- Entry 2:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660 sum: 75448 positions: 13595,4780,0,1554,14
- Entry 3:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788 sum: 104868 positions: 31432,228,0,2372,90
- Entry 4:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744 sum: 136158 positions: 54111,5096,0,3354,108
+ Row group indices for column 1:
+ Entry 0: count: 1000 hasNull: false min: -2132329551 max: 2145911404 sum: 61941331718 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -2138433136 max: 2145210552 sum: 14574030042 positions: 0,2050,488
+ Entry 2: count: 1000 hasNull: false min: -2147115959 max: 2137805337 sum: -2032493169 positions: 0,6150,464
+ Entry 3: count: 1000 hasNull: false min: -2137828953 max: 2145877119 sum: -3167202608 positions: 10003,250,440
+ Entry 4: count: 1000 hasNull: false min: -2146452517 max: 2142394906 sum: 88361503212 positions: 10003,4350,416
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9206837518492372266 max: 9169230975203934579 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -9188878639954124284 max: 9213664245516510068 positions: 0,4098,488
+ Entry 2: count: 1000 hasNull: false min: -9211329013123260308 max: 9217851628057711416 positions: 10003,2294,464
+ Entry 3: count: 1000 hasNull: false min: -9185745718227889962 max: 9181722705210917931 positions: 20006,490,440
+ Entry 4: count: 1000 hasNull: false min: -9216505819108477308 max: 9196474183833079923 positions: 20006,8686,416
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness,-230 max: worst-54-290-346-648-908-996 sum: 18442 positions: 0,0,0,0,0
+ Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966 sum: 46338 positions: 2777,8442,0,695,18
+ Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660 sum: 75448 positions: 13595,4780,0,1554,14
+ Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788 sum: 104868 positions: 31432,228,0,2372,90
+ Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744 sum: 136158 positions: 54111,5096,0,3354,108
Stripe: offset: 151897 data: 336358 rows: 5000 tail: 69 index: 954
Stream: column 0 section ROW_INDEX start: 151897 length 17
Stream: column 1 section ROW_INDEX start: 151914 length 153
@@ -83,24 +83,24 @@ Stripes:
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DIRECT_V2
- Row group index column 1:
- Entry 0:count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -50979197646 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -2143569489 max: 2141223179 sum: 22810066834 positions: 0,2050,488
- Entry 2:count: 1000 hasNull: false min: -2140649392 max: 2146301701 sum: -31694882346 positions: 0,6150,464
- Entry 3:count: 1000 hasNull: false min: -2147390285 max: 2146299933 sum: 79371934221 positions: 10003,250,440
- Entry 4:count: 1000 hasNull: false min: -2145928262 max: 2147224606 sum: -34469378822 positions: 10003,4350,416
- Row group index column 2:
- Entry 0:count: 1000 hasNull: false min: -9222178666167296739 max: 9191250610515369723 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -9220148577547102875 max: 9213945522531717278 positions: 0,4098,488
- Entry 2:count: 1000 hasNull: false min: -9220818777591257749 max: 9221301751385928177 positions: 10003,2294,464
- Entry 3:count: 1000 hasNull: false min: -9220031433030423388 max: 9207856144487414148 positions: 20006,490,440
- Entry 4:count: 1000 hasNull: false min: -9201438531577205959 max: 9212462124593119846 positions: 20006,8686,416
- Row group index column 3:
- Entry 0:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726 sum: 166320 positions: 0,0,0,0,0
- Entry 1:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994 sum: 193436 positions: 37112,6320,0,967,90
- Entry 2:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988 sum: 224740 positions: 80822,9756,0,1945,222
- Entry 3:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984 sum: 252094 positions: 137149,4496,0,3268,48
- Entry 4:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938 sum: 281404 positions: 197972,6590,0,4064,342
+ Row group indices for column 1:
+ Entry 0: count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -50979197646 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -2143569489 max: 2141223179 sum: 22810066834 positions: 0,2050,488
+ Entry 2: count: 1000 hasNull: false min: -2140649392 max: 2146301701 sum: -31694882346 positions: 0,6150,464
+ Entry 3: count: 1000 hasNull: false min: -2147390285 max: 2146299933 sum: 79371934221 positions: 10003,250,440
+ Entry 4: count: 1000 hasNull: false min: -2145928262 max: 2147224606 sum: -34469378822 positions: 10003,4350,416
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9222178666167296739 max: 9191250610515369723 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -9220148577547102875 max: 9213945522531717278 positions: 0,4098,488
+ Entry 2: count: 1000 hasNull: false min: -9220818777591257749 max: 9221301751385928177 positions: 10003,2294,464
+ Entry 3: count: 1000 hasNull: false min: -9220031433030423388 max: 9207856144487414148 positions: 20006,490,440
+ Entry 4: count: 1000 hasNull: false min: -9201438531577205959 max: 9212462124593119846 positions: 20006,8686,416
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726 sum: 166320 positions: 0,0,0,0,0
+ Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994 sum: 193436 positions: 37112,6320,0,967,90
+ Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988 sum: 224740 positions: 80822,9756,0,1945,222
+ Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984 sum: 252094 positions: 137149,4496,0,3268,48
+ Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938 sum: 281404 positions: 197972,6590,0,4064,342
Stripe: offset: 489278 data: 558031 rows: 5000 tail: 69 index: 1181
Stream: column 0 section ROW_INDEX start: 489278 length 17
Stream: column 1 section ROW_INDEX start: 489295 length 166
@@ -114,24 +114,24 @@ Stripes:
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DIRECT_V2
- Row group index column 1:
- Entry 0:count: 1000 hasNull: false min: -2138229212 max: 2144818981 sum: -22823642812 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -2145842720 max: 2144179881 sum: -12562754334 positions: 0,2050,488
- Entry 2:count: 1000 hasNull: false min: -2143045885 max: 2146718321 sum: 82993638644 positions: 0,6150,464
- Entry 3:count: 1000 hasNull: false min: -2144745617 max: 2146570474 sum: 25138722367 positions: 10003,250,440
- Entry 4:count: 1000 hasNull: false min: -2140127150 max: 2135081620 sum: 68346511655 positions: 10003,4350,416
- Row group index column 2:
- Entry 0:count: 1000 hasNull: false min: -9204340807292138409 max: 9208698732685326961 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -9221963099397084326 max: 9222722740629726770 positions: 0,4098,488
- Entry 2:count: 1000 hasNull: false min: -9210480084701091299 max: 9207767402467343058 positions: 10003,2294,464
- Entry 3:count: 1000 hasNull: false min: -9195038026813631215 max: 9199201928563274421 positions: 20006,490,440
- Entry 4:count: 1000 hasNull: false min: -9215483580266514322 max: 9220102792864959501 positions: 20006,8686,416
- Row group index column 3:
- Entry 0:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876 sum: 313880 positions: 0,0,0,0,0
- Entry 1:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964 sum: 349542 positions: 80352,3880,0,1097,28
- Entry 2:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976 sum: 386538 positions: 170641,3422,0,2077,162
- Entry 3:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766 sum: 421660 positions: 268420,9960,0,3369,16
- Entry 4:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974 sum: 453606 positions: 377916,1620,0,4041,470
+ Row group indices for column 1:
+ Entry 0: count: 1000 hasNull: false min: -2138229212 max: 2144818981 sum: -22823642812 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -2145842720 max: 2144179881 sum: -12562754334 positions: 0,2050,488
+ Entry 2: count: 1000 hasNull: false min: -2143045885 max: 2146718321 sum: 82993638644 positions: 0,6150,464
+ Entry 3: count: 1000 hasNull: false min: -2144745617 max: 2146570474 sum: 25138722367 positions: 10003,250,440
+ Entry 4: count: 1000 hasNull: false min: -2140127150 max: 2135081620 sum: 68346511655 positions: 10003,4350,416
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9204340807292138409 max: 9208698732685326961 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -9221963099397084326 max: 9222722740629726770 positions: 0,4098,488
+ Entry 2: count: 1000 hasNull: false min: -9210480084701091299 max: 9207767402467343058 positions: 10003,2294,464
+ Entry 3: count: 1000 hasNull: false min: -9195038026813631215 max: 9199201928563274421 positions: 20006,490,440
+ Entry 4: count: 1000 hasNull: false min: -9215483580266514322 max: 9220102792864959501 positions: 20006,8686,416
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876 sum: 313880 positions: 0,0,0,0,0
+ Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964 sum: 349542 positions: 80352,3880,0,1097,28
+ Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976 sum: 386538 positions: 170641,3422,0,2077,162
+ Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766 sum: 421660 positions: 268420,9960,0,3369,16
+ Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974 sum: 453606 positions: 377916,1620,0,4041,470
Stripe: offset: 1048559 data: 792850 rows: 5000 tail: 69 index: 1369
Stream: column 0 section ROW_INDEX start: 1048559 length 17
Stream: column 1 section ROW_INDEX start: 1048576 length 153
@@ -145,24 +145,24 @@ Stripes:
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DIRECT_V2
- Row group index column 1:
- Entry 0:count: 1000 hasNull: false min: -2145319330 max: 2146998132 sum: -50856753363 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -2134288866 max: 2147453086 sum: -17911019023 positions: 0,2050,488
- Entry 2:count: 1000 hasNull: false min: -2139010804 max: 2144727593 sum: -24993151857 positions: 0,6150,464
- Entry 3:count: 1000 hasNull: false min: -2145378214 max: 2144098933 sum: -18055164052 positions: 10003,250,440
- Entry 4:count: 1000 hasNull: false min: -2140494429 max: 2144595861 sum: -41863916235 positions: 10003,4350,416
- Row group index column 2:
- Entry 0:count: 1000 hasNull: false min: -9172774601303513941 max: 9212917101275642143 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -9218164880949195469 max: 9222919052987871506 positions: 0,4098,488
- Entry 2:count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 10003,2294,464
- Entry 3:count: 1000 hasNull: false min: -9196276654247395117 max: 9210639275226058005 positions: 20006,490,440
- Entry 4:count: 1000 hasNull: false min: -9197393848859294562 max: 9208134757538374043 positions: 20006,8686,416
- Row group index column 3:
- Entry 0:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610 sum: 492916 positions: 0,0,0,0,0
- Entry 1:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936 sum: 527290 positions: 126968,2916,0,1077,140
- Entry 2:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878 sum: 568274 positions: 263111,206,0,1926,462
- Entry 3:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788 sum: 594578 positions: 407371,8480,0,3444,250
- Entry 4:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904 sum: 631944 positions: 562094,3058,0,4643,292
+ Row group indices for column 1:
+ Entry 0: count: 1000 hasNull: false min: -2145319330 max: 2146998132 sum: -50856753363 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -2134288866 max: 2147453086 sum: -17911019023 positions: 0,2050,488
+ Entry 2: count: 1000 hasNull: false min: -2139010804 max: 2144727593 sum: -24993151857 positions: 0,6150,464
+ Entry 3: count: 1000 hasNull: false min: -2145378214 max: 2144098933 sum: -18055164052 positions: 10003,250,440
+ Entry 4: count: 1000 hasNull: false min: -2140494429 max: 2144595861 sum: -41863916235 positions: 10003,4350,416
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9172774601303513941 max: 9212917101275642143 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -9218164880949195469 max: 9222919052987871506 positions: 0,4098,488
+ Entry 2: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 10003,2294,464
+ Entry 3: count: 1000 hasNull: false min: -9196276654247395117 max: 9210639275226058005 positions: 20006,490,440
+ Entry 4: count: 1000 hasNull: false min: -9197393848859294562 max: 9208134757538374043 positions: 20006,8686,416
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610 sum: 492916 positions: 0,0,0,0,0
+ Entry 1: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936 sum: 527290 positions: 126968,2916,0,1077,140
+ Entry 2: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878 sum: 568274 positions: 263111,206,0,1926,462
+ Entry 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788 sum: 594578 positions: 407371,8480,0,3444,250
+ Entry 4: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904 sum: 631944 positions: 562094,3058,0,4643,292
Stripe: offset: 1842847 data: 188033 rows: 1000 tail: 67 index: 841
Stream: column 0 section ROW_INDEX start: 1842847 length 12
Stream: column 1 section ROW_INDEX start: 1842859 length 38
@@ -176,12 +176,12 @@ Stripes:
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DIRECT_V2
- Row group index column 1:
- Entry 0:count: 1000 hasNull: false min: -2143595397 max: 2136858458 sum: -22999664100 positions: 0,0,0
- Row group index column 2:
- Entry 0:count: 1000 hasNull: false min: -9212379634781416464 max: 9197412874152820822 positions: 0,0,0
- Row group index column 3:
- Entry 0:count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 670762 positions: 0,0,0,0,0
+ Row group indices for column 1:
+ Entry 0: count: 1000 hasNull: false min: -2143595397 max: 2136858458 sum: -22999664100 positions: 0,0,0
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9212379634781416464 max: 9197412874152820822 positions: 0,0,0
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 670762 positions: 0,0,0,0,0
File length: 2033643 bytes
Padding length: 0 bytes
diff --git ql/src/test/resources/orc-file-dump.out ql/src/test/resources/orc-file-dump.out
index cfeea24..2f5962b 100644
--- ql/src/test/resources/orc-file-dump.out
+++ ql/src/test/resources/orc-file-dump.out
@@ -53,24 +53,24 @@ Stripes:
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DICTIONARY_V2[35]
- Row group index column 1:
- Entry 0:count: 1000 hasNull: false min: -2145365268 max: 2135491313 sum: 7521792925 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -2139452528 max: 2147223299 sum: -12923774313 positions: 0,2050,488
- Entry 2:count: 1000 hasNull: false min: -2142420586 max: 2143898386 sum: -25521983511 positions: 0,6150,464
- Entry 3:count: 1000 hasNull: false min: -2137233441 max: 2144267163 sum: 40993386199 positions: 10003,250,440
- Entry 4:count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -9553628474 positions: 10003,4350,416
- Row group index column 2:
- Entry 0:count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 0,4098,488
- Entry 2:count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 10003,2294,464
- Entry 3:count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20006,490,440
- Entry 4:count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 20006,8686,416
- Row group index column 3:
- Entry 0:count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149
- Entry 2:count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3
- Entry 3:count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32
- Entry 4:count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45
+ Row group indices for column 1:
+ Entry 0: count: 1000 hasNull: false min: -2145365268 max: 2135491313 sum: 7521792925 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -2139452528 max: 2147223299 sum: -12923774313 positions: 0,2050,488
+ Entry 2: count: 1000 hasNull: false min: -2142420586 max: 2143898386 sum: -25521983511 positions: 0,6150,464
+ Entry 3: count: 1000 hasNull: false min: -2137233441 max: 2144267163 sum: 40993386199 positions: 10003,250,440
+ Entry 4: count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -9553628474 positions: 10003,4350,416
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 0,4098,488
+ Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 10003,2294,464
+ Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20006,490,440
+ Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 20006,8686,416
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149
+ Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3
+ Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32
+ Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45
Stripe: offset: 64288 data: 63754 rows: 5000 tail: 79 index: 433
Stream: column 0 section ROW_INDEX start: 64288 length 17
Stream: column 1 section ROW_INDEX start: 64305 length 162
@@ -85,24 +85,24 @@ Stripes:
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DICTIONARY_V2[35]
- Row group index column 1:
- Entry 0:count: 1000 hasNull: false min: -2143799121 max: 2145249879 sum: -6966266181 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -2146733128 max: 2147001622 sum: -35930106333 positions: 0,2050,488
- Entry 2:count: 1000 hasNull: false min: -2144302712 max: 2146299933 sum: 6944230435 positions: 0,6150,464
- Entry 3:count: 1000 hasNull: false min: -2145172948 max: 2144335014 sum: -29624404959 positions: 10003,250,440
- Entry 4:count: 1000 hasNull: false min: -2146428427 max: 2144067253 sum: 65584220465 positions: 10003,4350,416
- Row group index column 2:
- Entry 0:count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 0,4098,488
- Entry 2:count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 10003,2294,464
- Entry 3:count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20006,490,440
- Entry 4:count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 20006,8686,416
- Row group index column 3:
- Entry 0:count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12
- Entry 2:count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70
- Entry 3:count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43
- Entry 4:count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88
+ Row group indices for column 1:
+ Entry 0: count: 1000 hasNull: false min: -2143799121 max: 2145249879 sum: -6966266181 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -2146733128 max: 2147001622 sum: -35930106333 positions: 0,2050,488
+ Entry 2: count: 1000 hasNull: false min: -2144302712 max: 2146299933 sum: 6944230435 positions: 0,6150,464
+ Entry 3: count: 1000 hasNull: false min: -2145172948 max: 2144335014 sum: -29624404959 positions: 10003,250,440
+ Entry 4: count: 1000 hasNull: false min: -2146428427 max: 2144067253 sum: 65584220465 positions: 10003,4350,416
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 0,4098,488
+ Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 10003,2294,464
+ Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20006,490,440
+ Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 20006,8686,416
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12
+ Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70
+ Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43
+ Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88
Stripe: offset: 128554 data: 63766 rows: 5000 tail: 79 index: 437
Stream: column 0 section ROW_INDEX start: 128554 length 17
Stream: column 1 section ROW_INDEX start: 128571 length 159
@@ -117,24 +117,24 @@ Stripes:
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DICTIONARY_V2[35]
- Row group index column 1:
- Entry 0:count: 1000 hasNull: false min: -2146993718 max: 2144179881 sum: -7829543271 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -2144095505 max: 2144883384 sum: 51623839692 positions: 0,2050,488
- Entry 2:count: 1000 hasNull: false min: -2144113995 max: 2143773575 sum: 56574412741 positions: 0,6150,464
- Entry 3:count: 1000 hasNull: false min: -2146954065 max: 2146794873 sum: 4336083432 positions: 10003,250,440
- Entry 4:count: 1000 hasNull: false min: -2135511523 max: 2147378179 sum: 27955949957 positions: 10003,4350,416
- Row group index column 2:
- Entry 0:count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 0,4098,488
- Entry 2:count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 10003,2294,464
- Entry 3:count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20006,490,440
- Entry 4:count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 20006,8686,416
- Row group index column 3:
- Entry 0:count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174
- Entry 2:count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69
- Entry 3:count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194
- Entry 4:count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43
+ Row group indices for column 1:
+ Entry 0: count: 1000 hasNull: false min: -2146993718 max: 2144179881 sum: -7829543271 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -2144095505 max: 2144883384 sum: 51623839692 positions: 0,2050,488
+ Entry 2: count: 1000 hasNull: false min: -2144113995 max: 2143773575 sum: 56574412741 positions: 0,6150,464
+ Entry 3: count: 1000 hasNull: false min: -2146954065 max: 2146794873 sum: 4336083432 positions: 10003,250,440
+ Entry 4: count: 1000 hasNull: false min: -2135511523 max: 2147378179 sum: 27955949957 positions: 10003,4350,416
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 0,4098,488
+ Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 10003,2294,464
+ Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20006,490,440
+ Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 20006,8686,416
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174
+ Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69
+ Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194
+ Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43
Stripe: offset: 192836 data: 63796 rows: 5000 tail: 79 index: 440
Stream: column 0 section ROW_INDEX start: 192836 length 17
Stream: column 1 section ROW_INDEX start: 192853 length 162
@@ -149,24 +149,24 @@ Stripes:
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DICTIONARY_V2[35]
- Row group index column 1:
- Entry 0:count: 1000 hasNull: false min: -2141355639 max: 2145520931 sum: 2726719912 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -2138324170 max: 2140167376 sum: -23606674002 positions: 0,2050,488
- Entry 2:count: 1000 hasNull: false min: -2146658006 max: 2144329742 sum: -41530109703 positions: 0,6150,464
- Entry 3:count: 1000 hasNull: false min: -2144207593 max: 2139456355 sum: 13559842458 positions: 10003,250,440
- Entry 4:count: 1000 hasNull: false min: -2145744719 max: 2145417153 sum: 57383770571 positions: 10003,4350,416
- Row group index column 2:
- Entry 0:count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 0,4098,488
- Entry 2:count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 10003,2294,464
- Entry 3:count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20006,490,440
- Entry 4:count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 20006,8686,416
- Row group index column 3:
- Entry 0:count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0
- Entry 1:count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431
- Entry 2:count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52
- Entry 3:count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104
- Entry 4:count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131
+ Row group indices for column 1:
+ Entry 0: count: 1000 hasNull: false min: -2141355639 max: 2145520931 sum: 2726719912 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -2138324170 max: 2140167376 sum: -23606674002 positions: 0,2050,488
+ Entry 2: count: 1000 hasNull: false min: -2146658006 max: 2144329742 sum: -41530109703 positions: 0,6150,464
+ Entry 3: count: 1000 hasNull: false min: -2144207593 max: 2139456355 sum: 13559842458 positions: 10003,250,440
+ Entry 4: count: 1000 hasNull: false min: -2145744719 max: 2145417153 sum: 57383770571 positions: 10003,4350,416
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 0,4098,488
+ Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 10003,2294,464
+ Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20006,490,440
+ Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 20006,8686,416
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0
+ Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431
+ Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52
+ Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104
+ Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131
Stripe: offset: 257151 data: 12940 rows: 1000 tail: 71 index: 131
Stream: column 0 section ROW_INDEX start: 257151 length 12
Stream: column 1 section ROW_INDEX start: 257163 length 38
@@ -181,12 +181,12 @@ Stripes:
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
Encoding column 3: DICTIONARY_V2[35]
- Row group index column 1:
- Entry 0:count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 positions: 0,0,0
- Row group index column 2:
- Entry 0:count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 positions: 0,0,0
- Row group index column 3:
- Entry 0:count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0
+ Row group indices for column 1:
+ Entry 0: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 positions: 0,0,0
+ Row group indices for column 2:
+ Entry 0: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 positions: 0,0,0
+ Row group indices for column 3:
+ Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0
File length: 270838 bytes
Padding length: 0 bytes