diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java index 6344a66..95f8cc8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java @@ -118,8 +118,8 @@ *
  • 8 bits for lower run length bits
  • * * - *
  • Base value - encoded as varint
  • - *
  • Delta base - encoded as varint
  • + *
  • Base value - zigzag encoded value written as varint
  • + *
  • Delta base - zigzag encoded value written as varint
  • *
  • Delta blob - only positive values. monotonicity and orderness are decided * based on the sign of the base value and delta base
  • * @@ -472,29 +472,28 @@ private void determineEncoding() { // invariant - subtracting any number from any other in the literals after // this point won't overflow + // if min is equal to max then the delta is 0, this condition happens for + // fixed values run >10 which cannot be encoded with SHORT_REPEAT + if (min == max) { + assert isFixedDelta : min + "==" + max + + ", isFixedDelta cannot be false"; + assert currDelta == 0 : min + "==" + max + ", currDelta should be zero"; + fixedDelta = 0; + encoding = EncodingType.DELTA; + return; + } + + if (isFixedDelta) { + assert currDelta == initialDelta + : "currDelta should be equal to initialDelta for fixed delta encoding"; + encoding = EncodingType.DELTA; + fixedDelta = currDelta; + return; + } + // if initialDelta is 0 then we cannot delta encode as we cannot identify // the sign of deltas (increasing or decreasing) if (initialDelta != 0) { - - // if min is equal to max then the delta is 0, this condition happens for - // fixed values run >10 which cannot be encoded with SHORT_REPEAT - if (min == max) { - assert isFixedDelta : min + "==" + max + - ", isFixedDelta cannot be false"; - assert currDelta == 0 : min + "==" + max + ", currDelta should be zero"; - fixedDelta = 0; - encoding = EncodingType.DELTA; - return; - } - - if (isFixedDelta) { - assert currDelta == initialDelta - : "currDelta should be equal to initialDelta for fixed delta encoding"; - encoding = EncodingType.DELTA; - fixedDelta = currDelta; - return; - } - // stores the number of bits required for packing delta blob in // delta encoding bitsDeltaMax = utils.findClosestNumBits(deltaMax); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index 146f5b1..2992f3c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -1895,9 +1895,9 @@ public void testMemoryManagementV12() throws Exception { stripe.getDataLength() < 5000); } // with HIVE-7832, the dictionaries will be disabled after writing the first - // stripe as there are too many distinct values. Hence only 4 stripes as + // stripe as there are too many distinct values. Hence only 3 stripes as // compared to 25 stripes in version 0.11 (above test case) - assertEquals(4, i); + assertEquals(3, i); assertEquals(2500, reader.getNumberOfRows()); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig deleted file mode 100644 index 15ee24c..0000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig +++ /dev/null @@ -1,1150 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.orc; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.ValidTxnList; -import org.apache.hadoop.hive.common.ValidReadTxnList; -import org.apache.hadoop.hive.ql.io.AcidOutputFormat; -import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.RecordIdentifier; -import org.apache.hadoop.hive.ql.io.RecordUpdater; -import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.OriginalReaderPair; -import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey; -import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderPair; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.Reporter; -import org.junit.Test; -import org.mockito.MockSettings; -import org.mockito.Mockito; - -import java.io.File; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; - -public class TestOrcRawRecordMerger { - - private static final Logger LOG = LoggerFactory.getLogger(TestOrcRawRecordMerger.class); -//todo: why is statementId -1? - @Test - public void testOrdering() throws Exception { - ReaderKey left = new ReaderKey(100, 200, 1200, 300); - ReaderKey right = new ReaderKey(); - right.setValues(100, 200, 1000, 200,1); - assertTrue(right.compareTo(left) < 0); - assertTrue(left.compareTo(right) > 0); - assertEquals(false, left.equals(right)); - left.set(right); - assertTrue(right.compareTo(left) == 0); - assertEquals(true, right.equals(left)); - right.setRowId(2000); - assertTrue(right.compareTo(left) > 0); - left.setValues(1, 2, 3, 4,-1); - right.setValues(100, 2, 3, 4,-1); - assertTrue(left.compareTo(right) < 0); - assertTrue(right.compareTo(left) > 0); - left.setValues(1, 2, 3, 4,-1); - right.setValues(1, 100, 3, 4,-1); - assertTrue(left.compareTo(right) < 0); - assertTrue(right.compareTo(left) > 0); - left.setValues(1, 2, 3, 100,-1); - right.setValues(1, 2, 3, 4,-1); - assertTrue(left.compareTo(right) < 0); - assertTrue(right.compareTo(left) > 0); - - // ensure that we are consistent when comparing to the base class - RecordIdentifier ri = new RecordIdentifier(1, 2, 3); - assertEquals(1, ri.compareTo(left)); - assertEquals(-1, left.compareTo(ri)); - assertEquals(false, ri.equals(left)); - assertEquals(false, left.equals(ri)); - } - - private static void setRow(OrcStruct event, - int operation, - long originalTransaction, - int bucket, - long rowId, - long currentTransaction, - String value) { - event.setFieldValue(OrcRecordUpdater.OPERATION, new IntWritable(operation)); - event.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION, - new LongWritable(originalTransaction)); - event.setFieldValue(OrcRecordUpdater.BUCKET, new IntWritable(bucket)); - event.setFieldValue(OrcRecordUpdater.ROW_ID, new LongWritable(rowId)); - event.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION, - new LongWritable(currentTransaction)); - OrcStruct row = new OrcStruct(1); - row.setFieldValue(0, new Text(value)); - event.setFieldValue(OrcRecordUpdater.ROW, row); - } - - private static String value(OrcStruct event) { - return OrcRecordUpdater.getRow(event).getFieldValue(0).toString(); - } - - private List createStripes(long... rowCounts) { - long offset = 0; - List result = - new ArrayList(rowCounts.length); - for(long count: rowCounts) { - OrcProto.StripeInformation.Builder stripe = - OrcProto.StripeInformation.newBuilder(); - stripe.setDataLength(800).setIndexLength(100).setFooterLength(100) - .setNumberOfRows(count).setOffset(offset); - offset += 1000; - result.add(new ReaderImpl.StripeInformationImpl(stripe.build())); - } - return result; - } - - // can add .verboseLogging() to cause Mockito to log invocations - private final MockSettings settings = Mockito.withSettings(); - private final Path tmpDir = new Path(System.getProperty("test.tmp.dir", - "target" + File.separator + "test" + File.separator + "tmp")); - - private Reader createMockReader() throws IOException { - Reader reader = Mockito.mock(Reader.class, settings); - RecordReader recordReader = Mockito.mock(RecordReader.class, settings); - OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first"); - OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second"); - OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third"); - OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth"); - OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth"); - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))) - .thenReturn(recordReader); - - Mockito.when(recordReader.hasNext()). - thenReturn(true, true, true, true, true, false); - - Mockito.when(recordReader.getProgress()).thenReturn(1.0f); - - Mockito.when(recordReader.next(null)).thenReturn(row1); - Mockito.when(recordReader.next(row1)).thenReturn(row2); - Mockito.when(recordReader.next(row2)).thenReturn(row3); - Mockito.when(recordReader.next(row3)).thenReturn(row4); - Mockito.when(recordReader.next(row4)).thenReturn(row5); - - return reader; - } - - @Test - public void testReaderPair() throws Exception { - ReaderKey key = new ReaderKey(); - Reader reader = createMockReader(); - RecordIdentifier minKey = new RecordIdentifier(10, 20, 30); - RecordIdentifier maxKey = new RecordIdentifier(40, 50, 60); - ReaderPair pair = new ReaderPair(key, reader, 20, minKey, maxKey, - new Reader.Options(), 0); - RecordReader recordReader = pair.recordReader; - assertEquals(10, key.getTransactionId()); - assertEquals(20, key.getBucketId()); - assertEquals(40, key.getRowId()); - assertEquals(120, key.getCurrentTransactionId()); - assertEquals("third", value(pair.nextRecord)); - - pair.next(pair.nextRecord); - assertEquals(40, key.getTransactionId()); - assertEquals(50, key.getBucketId()); - assertEquals(60, key.getRowId()); - assertEquals(130, key.getCurrentTransactionId()); - assertEquals("fourth", value(pair.nextRecord)); - - pair.next(pair.nextRecord); - assertEquals(null, pair.nextRecord); - Mockito.verify(recordReader).close(); - } - - @Test - public void testReaderPairNoMin() throws Exception { - ReaderKey key = new ReaderKey(); - Reader reader = createMockReader(); - - ReaderPair pair = new ReaderPair(key, reader, 20, null, null, - new Reader.Options(), 0); - RecordReader recordReader = pair.recordReader; - assertEquals(10, key.getTransactionId()); - assertEquals(20, key.getBucketId()); - assertEquals(20, key.getRowId()); - assertEquals(100, key.getCurrentTransactionId()); - assertEquals("first", value(pair.nextRecord)); - - pair.next(pair.nextRecord); - assertEquals(10, key.getTransactionId()); - assertEquals(20, key.getBucketId()); - assertEquals(30, key.getRowId()); - assertEquals(110, key.getCurrentTransactionId()); - assertEquals("second", value(pair.nextRecord)); - - pair.next(pair.nextRecord); - assertEquals(10, key.getTransactionId()); - assertEquals(20, key.getBucketId()); - assertEquals(40, key.getRowId()); - assertEquals(120, key.getCurrentTransactionId()); - assertEquals("third", value(pair.nextRecord)); - - pair.next(pair.nextRecord); - assertEquals(40, key.getTransactionId()); - assertEquals(50, key.getBucketId()); - assertEquals(60, key.getRowId()); - assertEquals(130, key.getCurrentTransactionId()); - assertEquals("fourth", value(pair.nextRecord)); - - pair.next(pair.nextRecord); - assertEquals(40, key.getTransactionId()); - assertEquals(50, key.getBucketId()); - assertEquals(61, key.getRowId()); - assertEquals(140, key.getCurrentTransactionId()); - assertEquals("fifth", value(pair.nextRecord)); - - pair.next(pair.nextRecord); - assertEquals(null, pair.nextRecord); - Mockito.verify(recordReader).close(); - } - - private static OrcStruct createOriginalRow(String value) { - OrcStruct result = new OrcStruct(1); - result.setFieldValue(0, new Text(value)); - return result; - } - - private Reader createMockOriginalReader() throws IOException { - Reader reader = Mockito.mock(Reader.class, settings); - RecordReader recordReader = Mockito.mock(RecordReader.class, settings); - OrcStruct row1 = createOriginalRow("first"); - OrcStruct row2 = createOriginalRow("second"); - OrcStruct row3 = createOriginalRow("third"); - OrcStruct row4 = createOriginalRow("fourth"); - OrcStruct row5 = createOriginalRow("fifth"); - - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))) - .thenReturn(recordReader); - Mockito.when(recordReader.hasNext()). - thenReturn(true, true, true, true, true, false); - Mockito.when(recordReader.getRowNumber()).thenReturn(0L, 1L, 2L, 3L, 4L); - Mockito.when(recordReader.next(null)).thenReturn(row1); - Mockito.when(recordReader.next(row1)).thenReturn(row2); - Mockito.when(recordReader.next(row2)).thenReturn(row3); - Mockito.when(recordReader.next(row3)).thenReturn(row4); - Mockito.when(recordReader.next(row4)).thenReturn(row5); - return reader; - } - - @Test - public void testOriginalReaderPair() throws Exception { - ReaderKey key = new ReaderKey(); - Reader reader = createMockOriginalReader(); - RecordIdentifier minKey = new RecordIdentifier(0, 10, 1); - RecordIdentifier maxKey = new RecordIdentifier(0, 10, 3); - boolean[] includes = new boolean[]{true, true}; - ReaderPair pair = new OriginalReaderPair(key, reader, 10, minKey, maxKey, - new Reader.Options().include(includes)); - RecordReader recordReader = pair.recordReader; - assertEquals(0, key.getTransactionId()); - assertEquals(10, key.getBucketId()); - assertEquals(2, key.getRowId()); - assertEquals(0, key.getCurrentTransactionId()); - assertEquals("third", value(pair.nextRecord)); - - pair.next(pair.nextRecord); - assertEquals(0, key.getTransactionId()); - assertEquals(10, key.getBucketId()); - assertEquals(3, key.getRowId()); - assertEquals(0, key.getCurrentTransactionId()); - assertEquals("fourth", value(pair.nextRecord)); - - pair.next(pair.nextRecord); - assertEquals(null, pair.nextRecord); - Mockito.verify(recordReader).close(); - } - - private static ValidTxnList createMaximalTxnList() { - return new ValidReadTxnList(Long.MAX_VALUE + ":"); - } - - @Test - public void testOriginalReaderPairNoMin() throws Exception { - ReaderKey key = new ReaderKey(); - Reader reader = createMockOriginalReader(); - ReaderPair pair = new OriginalReaderPair(key, reader, 10, null, null, - new Reader.Options()); - assertEquals("first", value(pair.nextRecord)); - assertEquals(0, key.getTransactionId()); - assertEquals(10, key.getBucketId()); - assertEquals(0, key.getRowId()); - assertEquals(0, key.getCurrentTransactionId()); - - pair.next(pair.nextRecord); - assertEquals("second", value(pair.nextRecord)); - assertEquals(0, key.getTransactionId()); - assertEquals(10, key.getBucketId()); - assertEquals(1, key.getRowId()); - assertEquals(0, key.getCurrentTransactionId()); - - pair.next(pair.nextRecord); - assertEquals("third", value(pair.nextRecord)); - assertEquals(0, key.getTransactionId()); - assertEquals(10, key.getBucketId()); - assertEquals(2, key.getRowId()); - assertEquals(0, key.getCurrentTransactionId()); - - pair.next(pair.nextRecord); - assertEquals("fourth", value(pair.nextRecord)); - assertEquals(0, key.getTransactionId()); - assertEquals(10, key.getBucketId()); - assertEquals(3, key.getRowId()); - assertEquals(0, key.getCurrentTransactionId()); - - pair.next(pair.nextRecord); - assertEquals("fifth", value(pair.nextRecord)); - assertEquals(0, key.getTransactionId()); - assertEquals(10, key.getBucketId()); - assertEquals(4, key.getRowId()); - assertEquals(0, key.getCurrentTransactionId()); - - pair.next(pair.nextRecord); - assertEquals(null, pair.nextRecord); - Mockito.verify(pair.recordReader).close(); - } - - @Test - public void testNewBase() throws Exception { - Configuration conf = new Configuration(); - conf.set("columns", "col1"); - conf.set("columns.types", "string"); - Reader reader = Mockito.mock(Reader.class, settings); - RecordReader recordReader = Mockito.mock(RecordReader.class, settings); - - List types = new ArrayList(); - OrcProto.Type.Builder typeBuilder = OrcProto.Type.newBuilder(); - typeBuilder.setKind(OrcProto.Type.Kind.STRUCT).addSubtypes(1) - .addSubtypes(2).addSubtypes(3).addSubtypes(4).addSubtypes(5) - .addSubtypes(6); - types.add(typeBuilder.build()); - types.add(null); - types.add(null); - types.add(null); - types.add(null); - types.add(null); - typeBuilder.clearSubtypes(); - typeBuilder.addSubtypes(7); - types.add(typeBuilder.build()); - - Mockito.when(reader.getTypes()).thenReturn(types); - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))) - .thenReturn(recordReader); - - OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first"); - OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second"); - OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third"); - OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth"); - OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS); - setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth"); - - Mockito.when(recordReader.hasNext()). - thenReturn(true, true, true, true, true, false); - - Mockito.when(recordReader.getProgress()).thenReturn(1.0f); - - Mockito.when(recordReader.next(null)).thenReturn(row1, row4); - Mockito.when(recordReader.next(row1)).thenReturn(row2); - Mockito.when(recordReader.next(row2)).thenReturn(row3); - Mockito.when(recordReader.next(row3)).thenReturn(row5); - - Mockito.when(reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME)) - .thenReturn(ByteBuffer.wrap("10,20,30;40,50,60;40,50,61" - .getBytes("UTF-8"))); - Mockito.when(reader.getStripes()) - .thenReturn(createStripes(2, 2, 1)); - - OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, false, reader, - false, 10, createMaximalTxnList(), - new Reader.Options().range(1000, 1000), null); - RecordReader rr = merger.getCurrentReader().recordReader; - assertEquals(0, merger.getOtherReaders().size()); - - assertEquals(new RecordIdentifier(10, 20, 30), merger.getMinKey()); - assertEquals(new RecordIdentifier(40, 50, 60), merger.getMaxKey()); - RecordIdentifier id = merger.createKey(); - OrcStruct event = merger.createValue(); - - assertEquals(true, merger.next(id, event)); - assertEquals(10, id.getTransactionId()); - assertEquals(20, id.getBucketId()); - assertEquals(40, id.getRowId()); - assertEquals("third", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(40, id.getTransactionId()); - assertEquals(50, id.getBucketId()); - assertEquals(60, id.getRowId()); - assertEquals("fourth", getValue(event)); - - assertEquals(false, merger.next(id, event)); - assertEquals(1.0, merger.getProgress(), 0.01); - merger.close(); - Mockito.verify(rr).close(); - Mockito.verify(rr).getProgress(); - - StructObjectInspector eventObjectInspector = - (StructObjectInspector) merger.getObjectInspector(); - List fields = - eventObjectInspector.getAllStructFieldRefs(); - assertEquals(OrcRecordUpdater.FIELDS, fields.size()); - assertEquals("operation", - fields.get(OrcRecordUpdater.OPERATION).getFieldName()); - assertEquals("currentTransaction", - fields.get(OrcRecordUpdater.CURRENT_TRANSACTION).getFieldName()); - assertEquals("originalTransaction", - fields.get(OrcRecordUpdater.ORIGINAL_TRANSACTION).getFieldName()); - assertEquals("bucket", - fields.get(OrcRecordUpdater.BUCKET).getFieldName()); - assertEquals("rowId", - fields.get(OrcRecordUpdater.ROW_ID).getFieldName()); - StructObjectInspector rowObjectInspector = - (StructObjectInspector) fields.get(OrcRecordUpdater.ROW) - .getFieldObjectInspector(); - assertEquals("col1", - rowObjectInspector.getAllStructFieldRefs().get(0).getFieldName()); - } - - static class MyRow { - Text col1; - RecordIdentifier ROW__ID; - - MyRow(String val) { - col1 = new Text(val); - } - - MyRow(String val, long rowId, long origTxn, int bucket) { - col1 = new Text(val); - ROW__ID = new RecordIdentifier(origTxn, bucket, rowId); - } - } - - static String getValue(OrcStruct event) { - return OrcRecordUpdater.getRow(event).getFieldValue(0).toString(); - } - - @Test - public void testEmpty() throws Exception { - final int BUCKET = 0; - Configuration conf = new Configuration(); - OrcOutputFormat of = new OrcOutputFormat(); - FileSystem fs = FileSystem.getLocal(conf); - Path root = new Path(tmpDir, "testEmpty").makeQualified(fs); - fs.delete(root, true); - ObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = ObjectInspectorFactory.getReflectionObjectInspector - (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - // write the empty base - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) - .inspector(inspector).bucket(BUCKET).writingBase(true) - .maximumTransactionId(100).finalDestination(root); - of.getRecordUpdater(root, options).close(false); - - ValidTxnList txnList = new ValidReadTxnList("200:"); - AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, txnList); - - Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), - BUCKET); - Reader baseReader = OrcFile.createReader(basePath, - OrcFile.readerOptions(conf)); - OrcRawRecordMerger merger = - new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, - createMaximalTxnList(), new Reader.Options(), - AcidUtils.getPaths(directory.getCurrentDirectories())); - RecordIdentifier key = merger.createKey(); - OrcStruct value = merger.createValue(); - assertEquals(false, merger.next(key, value)); - } - - /** - * Test the OrcRecordUpdater with the OrcRawRecordMerger when there is - * a base and a delta. - * @throws Exception - */ - @Test - public void testNewBaseAndDelta() throws Exception { - testNewBaseAndDelta(false); - testNewBaseAndDelta(true); - } - private void testNewBaseAndDelta(boolean use130Format) throws Exception { - final int BUCKET = 10; - String[] values = new String[]{"first", "second", "third", "fourth", - "fifth", "sixth", "seventh", "eighth", - "ninth", "tenth"}; - Configuration conf = new Configuration(); - OrcOutputFormat of = new OrcOutputFormat(); - FileSystem fs = FileSystem.getLocal(conf); - Path root = new Path(tmpDir, "testNewBaseAndDelta").makeQualified(fs); - fs.delete(root, true); - ObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = ObjectInspectorFactory.getReflectionObjectInspector - (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - // write the base - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) - .inspector(inspector).bucket(BUCKET).finalDestination(root); - if(!use130Format) { - options.statementId(-1); - } - RecordUpdater ru = of.getRecordUpdater(root, - options.writingBase(true).maximumTransactionId(100)); - for(String v: values) { - ru.insert(0, new MyRow(v)); - } - ru.close(false); - - // write a delta - ru = of.getRecordUpdater(root, options.writingBase(false) - .minimumTransactionId(200).maximumTransactionId(200).recordIdColumn(1)); - ru.update(200, new MyRow("update 1", 0, 0, BUCKET)); - ru.update(200, new MyRow("update 2", 2, 0, BUCKET)); - ru.update(200, new MyRow("update 3", 3, 0, BUCKET)); - ru.delete(200, new MyRow("", 7, 0, BUCKET)); - ru.delete(200, new MyRow("", 8, 0, BUCKET)); - ru.close(false); - - ValidTxnList txnList = new ValidReadTxnList("200:"); - AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, txnList); - - assertEquals(new Path(root, "base_0000100"), directory.getBaseDirectory()); - assertEquals(new Path(root, use130Format ? - AcidUtils.deltaSubdir(200,200,0) : AcidUtils.deltaSubdir(200,200)), - directory.getCurrentDirectories().get(0).getPath()); - - Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), - BUCKET); - Reader baseReader = OrcFile.createReader(basePath, - OrcFile.readerOptions(conf)); - OrcRawRecordMerger merger = - new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, - createMaximalTxnList(), new Reader.Options(), - AcidUtils.getPaths(directory.getCurrentDirectories())); - assertEquals(null, merger.getMinKey()); - assertEquals(null, merger.getMaxKey()); - RecordIdentifier id = merger.createKey(); - OrcStruct event = merger.createValue(); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.UPDATE_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 0, 200), id); - assertEquals("update 1", getValue(event)); - assertFalse(merger.isDelete(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 1, 0), id); - assertEquals("second", getValue(event)); - assertFalse(merger.isDelete(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.UPDATE_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 2, 200), id); - assertEquals("update 2", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.UPDATE_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 3, 200), id); - assertEquals("update 3", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 4, 0), id); - assertEquals("fifth", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 5, 0), id); - assertEquals("sixth", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 6, 0), id); - assertEquals("seventh", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.DELETE_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 7, 200), id); - assertNull(OrcRecordUpdater.getRow(event)); - assertTrue(merger.isDelete(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.DELETE_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 8, 200), id); - assertNull(OrcRecordUpdater.getRow(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 9, 0), id); - assertEquals("tenth", getValue(event)); - - assertEquals(false, merger.next(id, event)); - merger.close(); - - // make a merger that doesn't collapse events - merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET, - createMaximalTxnList(), new Reader.Options(), - AcidUtils.getPaths(directory.getCurrentDirectories())); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.UPDATE_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 0, 200), id); - assertEquals("update 1", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 0, 0), id); - assertEquals("first", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 1, 0), id); - assertEquals("second", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.UPDATE_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 2, 200), id); - assertEquals("update 2", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 2, 0), id); - assertEquals("third", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.UPDATE_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 3, 200), id); - assertEquals("update 3", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 3, 0), id); - assertEquals("fourth", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 4, 0), id); - assertEquals("fifth", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 5, 0), id); - assertEquals("sixth", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 6, 0), id); - assertEquals("seventh", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.DELETE_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 7, 200), id); - assertNull(OrcRecordUpdater.getRow(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 7, 0), id); - assertEquals("eighth", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.DELETE_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 8, 200), id); - assertNull(OrcRecordUpdater.getRow(event)); - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 8, 0), id); - assertEquals("ninth", getValue(event)); - - assertEquals(true, merger.next(id, event)); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, 9, 0), id); - assertEquals("tenth", getValue(event)); - - assertEquals(false, merger.next(id, event)); - merger.close(); - - // try ignoring the 200 transaction and make sure it works still - ValidTxnList txns = new ValidReadTxnList("2000:200"); - merger = - new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, - txns, new Reader.Options(), - AcidUtils.getPaths(directory.getCurrentDirectories())); - for(int i=0; i < values.length; ++i) { - assertEquals(true, merger.next(id, event)); - LOG.info("id = " + id + "event = " + event); - assertEquals(OrcRecordUpdater.INSERT_OPERATION, - OrcRecordUpdater.getOperation(event)); - assertEquals(new ReaderKey(0, BUCKET, i, 0), id); - assertEquals(values[i], getValue(event)); - } - - assertEquals(false, merger.next(id, event)); - merger.close(); - } - - static class BigRow { - int myint; - long mylong; - Text mytext; - float myfloat; - double mydouble; - RecordIdentifier ROW__ID; - - BigRow(int myint, long mylong, String mytext, float myfloat, double mydouble) { - this.myint = myint; - this.mylong = mylong; - this.mytext = new Text(mytext); - this.myfloat = myfloat; - this.mydouble = mydouble; - ROW__ID = null; - } - - BigRow(int myint, long mylong, String mytext, float myfloat, double mydouble, - long rowId, long origTxn, int bucket) { - this.myint = myint; - this.mylong = mylong; - this.mytext = new Text(mytext); - this.myfloat = myfloat; - this.mydouble = mydouble; - ROW__ID = new RecordIdentifier(origTxn, bucket, rowId); - } - - BigRow(long rowId, long origTxn, int bucket) { - ROW__ID = new RecordIdentifier(origTxn, bucket, rowId); - } - } - - /** - * Test the OrcRecordUpdater with the OrcRawRecordMerger when there is - * a base and a delta. - * @throws Exception - */ - @Test - public void testRecordReaderOldBaseAndDelta() throws Exception { - final int BUCKET = 10; - Configuration conf = new Configuration(); - OrcOutputFormat of = new OrcOutputFormat(); - FileSystem fs = FileSystem.getLocal(conf); - Path root = new Path(tmpDir, "testOldBaseAndDelta").makeQualified(fs); - fs.delete(root, true); - ObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = ObjectInspectorFactory.getReflectionObjectInspector - (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - // write the base - MemoryManager mgr = new MemoryManager(conf){ - int rowsAddedSinceCheck = 0; - - @Override - synchronized void addedRow(int rows) throws IOException { - rowsAddedSinceCheck += rows; - if (rowsAddedSinceCheck >= 2) { - notifyWriters(); - rowsAddedSinceCheck = 0; - } - } - }; - // make 5 stripes with 2 rows each - Writer writer = OrcFile.createWriter(new Path(root, "0000010_0"), - OrcFile.writerOptions(conf).inspector(inspector).fileSystem(fs) - .blockPadding(false).bufferSize(10000).compress(CompressionKind.NONE) - .stripeSize(1).memory(mgr).version(OrcFile.Version.V_0_11)); - String[] values= new String[]{"ignore.1", "0.1", "ignore.2", "ignore.3", - "2.0", "2.1", "3.0", "ignore.4", "ignore.5", "ignore.6"}; - for(int i=0; i < values.length; ++i) { - writer.addRow(new BigRow(i, i, values[i], i, i)); - } - writer.close(); - - // write a delta - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) - .writingBase(false).minimumTransactionId(1).maximumTransactionId(1) - .bucket(BUCKET).inspector(inspector).filesystem(fs).recordIdColumn(5).finalDestination(root); - RecordUpdater ru = of.getRecordUpdater(root, options); - values = new String[]{"0.0", null, null, "1.1", null, null, null, - "ignore.7"}; - for(int i=0; i < values.length; ++i) { - if (values[i] != null) { - ru.update(1, new BigRow(i, i, values[i], i, i, i, 0, BUCKET)); - } - } - ru.delete(100, new BigRow(9, 0, BUCKET)); - ru.close(false); - - // write a delta - options = options.minimumTransactionId(2).maximumTransactionId(2); - ru = of.getRecordUpdater(root, options); - values = new String[]{null, null, "1.0", null, null, null, null, "3.1"}; - for(int i=0; i < values.length; ++i) { - if (values[i] != null) { - ru.update(2, new BigRow(i, i, values[i], i, i, i, 0, BUCKET)); - } - } - ru.delete(100, new BigRow(8, 0, BUCKET)); - ru.close(false); - - InputFormat inf = new OrcInputFormat(); - JobConf job = new JobConf(); - job.set("mapred.min.split.size", "1"); - job.set("mapred.max.split.size", "2"); - job.set("mapred.input.dir", root.toString()); - InputSplit[] splits = inf.getSplits(job, 5); - assertEquals(5, splits.length); - org.apache.hadoop.mapred.RecordReader rr; - - // loop through the 5 splits and read each - for(int i=0; i < 4; ++i) { - System.out.println("starting split " + i); - rr = inf.getRecordReader(splits[i], job, Reporter.NULL); - NullWritable key = rr.createKey(); - OrcStruct value = rr.createValue(); - - // there should be exactly two rows per a split - for(int j=0; j < 2; ++j) { - System.out.println("i = " + i + ", j = " + j); - assertEquals(true, rr.next(key, value)); - System.out.println("record = " + value); - assertEquals(i + "." + j, value.getFieldValue(2).toString()); - } - assertEquals(false, rr.next(key, value)); - } - rr = inf.getRecordReader(splits[4], job, Reporter.NULL); - assertEquals(false, rr.next(rr.createKey(), rr.createValue())); - } - - /** - * Test the RecordReader when there is a new base and a delta. - * @throws Exception - */ - @Test - public void testRecordReaderNewBaseAndDelta() throws Exception { - final int BUCKET = 11; - Configuration conf = new Configuration(); - OrcOutputFormat of = new OrcOutputFormat(); - FileSystem fs = FileSystem.getLocal(conf); - Path root = new Path(tmpDir, "testRecordReaderNewBaseAndDelta").makeQualified(fs); - fs.delete(root, true); - ObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = ObjectInspectorFactory.getReflectionObjectInspector - (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - // write the base - MemoryManager mgr = new MemoryManager(conf){ - int rowsAddedSinceCheck = 0; - - @Override - synchronized void addedRow(int rows) throws IOException { - rowsAddedSinceCheck += rows; - if (rowsAddedSinceCheck >= 2) { - notifyWriters(); - rowsAddedSinceCheck = 0; - } - } - }; - - // make 5 stripes with 2 rows each - OrcRecordUpdater.OrcOptions options = (OrcRecordUpdater.OrcOptions) - new OrcRecordUpdater.OrcOptions(conf) - .writingBase(true).minimumTransactionId(0).maximumTransactionId(0) - .bucket(BUCKET).inspector(inspector).filesystem(fs); - options.orcOptions(OrcFile.writerOptions(conf) - .stripeSize(1).blockPadding(false).compress(CompressionKind.NONE) - .memory(mgr)); - options.finalDestination(root); - RecordUpdater ru = of.getRecordUpdater(root, options); - String[] values= new String[]{"ignore.1", "0.1", "ignore.2", "ignore.3", - "2.0", "2.1", "3.0", "ignore.4", "ignore.5", "ignore.6"}; - for(int i=0; i < values.length; ++i) { - ru.insert(0, new BigRow(i, i, values[i], i, i)); - } - ru.close(false); - - // write a delta - options.writingBase(false).minimumTransactionId(1).maximumTransactionId(1).recordIdColumn(5); - ru = of.getRecordUpdater(root, options); - values = new String[]{"0.0", null, null, "1.1", null, null, null, - "ignore.7"}; - for(int i=0; i < values.length; ++i) { - if (values[i] != null) { - ru.update(1, new BigRow(i, i, values[i], i, i, i, 0, BUCKET)); - } - } - ru.delete(100, new BigRow(9, 0, BUCKET)); - ru.close(false); - - // write a delta - options.minimumTransactionId(2).maximumTransactionId(2); - ru = of.getRecordUpdater(root, options); - values = new String[]{null, null, "1.0", null, null, null, null, "3.1"}; - for(int i=0; i < values.length; ++i) { - if (values[i] != null) { - ru.update(2, new BigRow(i, i, values[i], i, i, i, 0, BUCKET)); - } - } - ru.delete(100, new BigRow(8, 0, BUCKET)); - ru.close(false); - - InputFormat inf = new OrcInputFormat(); - JobConf job = new JobConf(); - job.set("mapred.min.split.size", "1"); - job.set("mapred.max.split.size", "2"); - job.set("mapred.input.dir", root.toString()); - InputSplit[] splits = inf.getSplits(job, 5); - assertEquals(5, splits.length); - org.apache.hadoop.mapred.RecordReader rr; - - // loop through the 5 splits and read each - for(int i=0; i < 4; ++i) { - System.out.println("starting split " + i); - rr = inf.getRecordReader(splits[i], job, Reporter.NULL); - NullWritable key = rr.createKey(); - OrcStruct value = rr.createValue(); - - // there should be exactly two rows per a split - for(int j=0; j < 2; ++j) { - System.out.println("i = " + i + ", j = " + j); - assertEquals(true, rr.next(key, value)); - System.out.println("record = " + value); - assertEquals(i + "." + j, value.getFieldValue(2).toString()); - } - assertEquals(false, rr.next(key, value)); - } - rr = inf.getRecordReader(splits[4], job, Reporter.NULL); - assertEquals(false, rr.next(rr.createKey(), rr.createValue())); - } - - /** - * Test the RecordReader when there is a new base and a delta. - * @throws Exception - */ - @Test - public void testRecordReaderDelta() throws Exception { - final int BUCKET = 0; - Configuration conf = new Configuration(); - OrcOutputFormat of = new OrcOutputFormat(); - FileSystem fs = FileSystem.getLocal(conf); - Path root = new Path(tmpDir, "testRecordReaderDelta").makeQualified(fs); - fs.delete(root, true); - ObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = ObjectInspectorFactory.getReflectionObjectInspector - (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - // write a delta - AcidOutputFormat.Options options = - new AcidOutputFormat.Options(conf) - .bucket(BUCKET).inspector(inspector).filesystem(fs) - .writingBase(false).minimumTransactionId(1).maximumTransactionId(1) - .finalDestination(root); - RecordUpdater ru = of.getRecordUpdater(root, options); - String[] values = new String[]{"a", "b", "c", "d", "e"}; - for(int i=0; i < values.length; ++i) { - ru.insert(1, new MyRow(values[i])); - } - ru.close(false); - - // write a delta - options.minimumTransactionId(2).maximumTransactionId(2); - ru = of.getRecordUpdater(root, options); - values = new String[]{"f", "g", "h", "i", "j"}; - for(int i=0; i < values.length; ++i) { - ru.insert(2, new MyRow(values[i])); - } - ru.close(false); - - InputFormat inf = new OrcInputFormat(); - JobConf job = new JobConf(); - job.set("mapred.min.split.size", "1"); - job.set("mapred.max.split.size", "2"); - job.set("mapred.input.dir", root.toString()); - job.set("bucket_count", "1"); - InputSplit[] splits = inf.getSplits(job, 5); - assertEquals(1, splits.length); - org.apache.hadoop.mapred.RecordReader rr; - rr = inf.getRecordReader(splits[0], job, Reporter.NULL); - values = new String[]{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"}; - OrcStruct row = rr.createValue(); - for(int i = 0; i < values.length; ++i) { - System.out.println("Checking " + i); - assertEquals(true, rr.next(NullWritable.get(), row)); - assertEquals(values[i], row.getFieldValue(0).toString()); - } - assertEquals(false, rr.next(NullWritable.get(), row)); - } - - /** - * Test the RecordReader when the delta has been flushed, but not closed. - * @throws Exception - */ - @Test - public void testRecordReaderIncompleteDelta() throws Exception { - testRecordReaderIncompleteDelta(false); - testRecordReaderIncompleteDelta(true); - } - /** - * - * @param use130Format true means use delta_0001_0001_0000 format, else delta_0001_00001 - */ - private void testRecordReaderIncompleteDelta(boolean use130Format) throws Exception { - final int BUCKET = 1; - Configuration conf = new Configuration(); - OrcOutputFormat of = new OrcOutputFormat(); - FileSystem fs = FileSystem.getLocal(conf).getRaw(); - Path root = new Path(tmpDir, "testRecordReaderIncompleteDelta").makeQualified(fs); - fs.delete(root, true); - ObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = ObjectInspectorFactory.getReflectionObjectInspector - (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - // write a base - AcidOutputFormat.Options options = - new AcidOutputFormat.Options(conf) - .writingBase(true).minimumTransactionId(0).maximumTransactionId(0) - .bucket(BUCKET).inspector(inspector).filesystem(fs).finalDestination(root); - if(!use130Format) { - options.statementId(-1); - } - RecordUpdater ru = of.getRecordUpdater(root, options); - String[] values= new String[]{"1", "2", "3", "4", "5"}; - for(int i=0; i < values.length; ++i) { - ru.insert(0, new MyRow(values[i])); - } - ru.close(false); - - // write a delta - options.writingBase(false).minimumTransactionId(10) - .maximumTransactionId(19); - ru = of.getRecordUpdater(root, options); - values = new String[]{"6", "7", "8"}; - for(int i=0; i < values.length; ++i) { - ru.insert(1, new MyRow(values[i])); - } - InputFormat inf = new OrcInputFormat(); - JobConf job = new JobConf(); - job.set("mapred.input.dir", root.toString()); - job.set("bucket_count", "2"); - - // read the keys before the delta is flushed - InputSplit[] splits = inf.getSplits(job, 1); - assertEquals(2, splits.length); - org.apache.hadoop.mapred.RecordReader rr = - inf.getRecordReader(splits[0], job, Reporter.NULL); - NullWritable key = rr.createKey(); - OrcStruct value = rr.createValue(); - System.out.println("Looking at split " + splits[0]); - for(int i=1; i < 6; ++i) { - System.out.println("Checking row " + i); - assertEquals(true, rr.next(key, value)); - assertEquals(Integer.toString(i), value.getFieldValue(0).toString()); - } - assertEquals(false, rr.next(key, value)); - - ru.flush(); - ru.flush(); - values = new String[]{"9", "10"}; - for(int i=0; i < values.length; ++i) { - ru.insert(3, new MyRow(values[i])); - } - ru.flush(); - - splits = inf.getSplits(job, 1); - assertEquals(2, splits.length); - rr = inf.getRecordReader(splits[0], job, Reporter.NULL); - Path sideFile = new Path(root + "/" + (use130Format ? AcidUtils.deltaSubdir(10,19,0) : - AcidUtils.deltaSubdir(10,19)) + "/bucket_00001_flush_length"); - assertEquals(true, fs.exists(sideFile)); - assertEquals(24, fs.getFileStatus(sideFile).getLen()); - - for(int i=1; i < 11; ++i) { - assertEquals(true, rr.next(key, value)); - assertEquals(Integer.toString(i), value.getFieldValue(0).toString()); - } - assertEquals(false, rr.next(key, value)); - } - -} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java new file mode 100644 index 0000000..1a3559e --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java @@ -0,0 +1,297 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.PrintStream; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; + +public class TestRLEv2 { + Path workDir = new Path(System.getProperty("test.tmp.dir", + "target" + File.separator + "test" + File.separator + "tmp")); + Path testFilePath; + Configuration conf; + FileSystem fs; + + @Rule + public TestName testCaseName = new TestName(); + + @Before + public void openFileSystem () throws Exception { + conf = new Configuration(); + fs = FileSystem.getLocal(conf); + testFilePath = new Path(workDir, "TestRLEv2." + + testCaseName.getMethodName() + ".orc"); + fs.delete(testFilePath, false); + } + + @Test + public void testFixedDeltaZero() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + for (int i = 0; i < 5120; ++i) { + w.addRow(123); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 2 bytes base (base = 123, + // zigzag encoded varint) and 1 byte delta (delta = 0). In total, 5 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50")); + System.setOut(origOut); + } + + @Test + public void testFixedDeltaOne() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + for (int i = 0; i < 5120; ++i) { + w.addRow(i % 512); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0) + // and 1 byte delta (delta = 1). In total, 4 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 40")); + System.setOut(origOut); + } + + @Test + public void testFixedDeltaOneDescending() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + for (int i = 0; i < 5120; ++i) { + w.addRow(512 - (i % 512)); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint) + // and 1 byte delta (delta = 1). In total, 5 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50")); + System.setOut(origOut); + } + + @Test + public void testFixedDeltaLarge() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + for (int i = 0; i < 5120; ++i) { + w.addRow(i % 512 + ((i % 512 ) * 100)); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0) + // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 5 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50")); + System.setOut(origOut); + } + + @Test + public void testFixedDeltaLargeDescending() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + for (int i = 0; i < 5120; ++i) { + w.addRow((512 - i % 512) + ((i % 512 ) * 100)); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint) + // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 6 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 60")); + System.setOut(origOut); + } + + @Test + public void testShortRepeat() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + for (int i = 0; i < 5; ++i) { + w.addRow(10); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 1 byte header + 1 byte value + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 2")); + System.setOut(origOut); + } + + @Test + public void testDeltaUnknownSign() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + w.addRow(0); + for (int i = 0; i < 511; ++i) { + w.addRow(i); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding + // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits + // each, 5120/8 = 640). Total bytes 642 + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 642")); + System.setOut(origOut); + } + + @Test + public void testPatchedBase() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + Random rand = new Random(123); + w.addRow(10000000); + for (int i = 0; i < 511; ++i) { + w.addRow(rand.nextInt(i+1)); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // use PATCHED_BASE encoding + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 583")); + System.setOut(origOut); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java index 134f78c..070e2ab 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java @@ -1984,9 +1984,9 @@ public void testMemoryManagementV12() throws Exception { stripe.getDataLength() < 5000); } // with HIVE-7832, the dictionaries will be disabled after writing the first - // stripe as there are too many distinct values. Hence only 4 stripes as + // stripe as there are too many distinct values. Hence only 3 stripes as // compared to 25 stripes in version 0.11 (above test case) - assertEquals(4, i); + assertEquals(3, i); assertEquals(2500, reader.getNumberOfRows()); } diff --git a/ql/src/test/resources/orc-file-has-null.out b/ql/src/test/resources/orc-file-has-null.out index bef44a5..d0b25da 100644 --- a/ql/src/test/resources/orc-file-has-null.out +++ b/ql/src/test/resources/orc-file-has-null.out @@ -29,35 +29,35 @@ File Statistics: Column 2: count: 7000 hasNull: true min: RG1 max: STRIPE-3 sum: 46000 Stripes: - Stripe: offset: 3 data: 241 rows: 5000 tail: 67 index: 163 + Stripe: offset: 3 data: 220 rows: 5000 tail: 65 index: 154 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 64 - Stream: column 2 section ROW_INDEX start: 84 length 82 - Stream: column 1 section DATA start: 166 length 159 - Stream: column 1 section LENGTH start: 325 length 32 - Stream: column 2 section PRESENT start: 357 length 13 - Stream: column 2 section DATA start: 370 length 22 - Stream: column 2 section LENGTH start: 392 length 6 - Stream: column 2 section DICTIONARY_DATA start: 398 length 9 + Stream: column 1 section ROW_INDEX start: 20 length 60 + Stream: column 2 section ROW_INDEX start: 80 length 77 + Stream: column 1 section DATA start: 157 length 159 + Stream: column 1 section LENGTH start: 316 length 15 + Stream: column 2 section PRESENT start: 331 length 13 + Stream: column 2 section DATA start: 344 length 18 + Stream: column 2 section LENGTH start: 362 length 6 + Stream: column 2 section DICTIONARY_DATA start: 368 length 9 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[2] Row group indices for column 2: Entry 0: count: 1000 hasNull: false min: RG1 max: RG1 sum: 3000 positions: 0,0,0,0,0,0,0 - Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,66,488 - Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,66,488 - Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,136,488 - Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,136,488 - Stripe: offset: 474 data: 202 rows: 5000 tail: 64 index: 120 - Stream: column 0 section ROW_INDEX start: 474 length 17 - Stream: column 1 section ROW_INDEX start: 491 length 64 - Stream: column 2 section ROW_INDEX start: 555 length 39 - Stream: column 1 section DATA start: 594 length 159 - Stream: column 1 section LENGTH start: 753 length 32 - Stream: column 2 section PRESENT start: 785 length 11 - Stream: column 2 section DATA start: 796 length 0 - Stream: column 2 section LENGTH start: 796 length 0 - Stream: column 2 section DICTIONARY_DATA start: 796 length 0 + Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,4,488 + Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,4,488 + Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,12,488 + Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,12,488 + Stripe: offset: 442 data: 185 rows: 5000 tail: 64 index: 116 + Stream: column 0 section ROW_INDEX start: 442 length 17 + Stream: column 1 section ROW_INDEX start: 459 length 60 + Stream: column 2 section ROW_INDEX start: 519 length 39 + Stream: column 1 section DATA start: 558 length 159 + Stream: column 1 section LENGTH start: 717 length 15 + Stream: column 2 section PRESENT start: 732 length 11 + Stream: column 2 section DATA start: 743 length 0 + Stream: column 2 section LENGTH start: 743 length 0 + Stream: column 2 section DICTIONARY_DATA start: 743 length 0 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] @@ -67,34 +67,34 @@ Stripes: Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0 Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 - Stripe: offset: 860 data: 232 rows: 5000 tail: 63 index: 149 - Stream: column 0 section ROW_INDEX start: 860 length 17 - Stream: column 1 section ROW_INDEX start: 877 length 64 - Stream: column 2 section ROW_INDEX start: 941 length 68 - Stream: column 1 section DATA start: 1009 length 159 - Stream: column 1 section LENGTH start: 1168 length 32 - Stream: column 2 section DATA start: 1200 length 24 - Stream: column 2 section LENGTH start: 1224 length 6 - Stream: column 2 section DICTIONARY_DATA start: 1230 length 11 + Stripe: offset: 807 data: 206 rows: 5000 tail: 60 index: 137 + Stream: column 0 section ROW_INDEX start: 807 length 17 + Stream: column 1 section ROW_INDEX start: 824 length 60 + Stream: column 2 section ROW_INDEX start: 884 length 60 + Stream: column 1 section DATA start: 944 length 159 + Stream: column 1 section LENGTH start: 1103 length 15 + Stream: column 2 section DATA start: 1118 length 15 + Stream: column 2 section LENGTH start: 1133 length 6 + Stream: column 2 section DICTIONARY_DATA start: 1139 length 11 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[1] Row group indices for column 2: Entry 0: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,66,488 - Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,198,464 - Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,330,440 - Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,462,416 - Stripe: offset: 1304 data: 202 rows: 5000 tail: 64 index: 120 - Stream: column 0 section ROW_INDEX start: 1304 length 17 - Stream: column 1 section ROW_INDEX start: 1321 length 64 - Stream: column 2 section ROW_INDEX start: 1385 length 39 - Stream: column 1 section DATA start: 1424 length 159 - Stream: column 1 section LENGTH start: 1583 length 32 - Stream: column 2 section PRESENT start: 1615 length 11 - Stream: column 2 section DATA start: 1626 length 0 - Stream: column 2 section LENGTH start: 1626 length 0 - Stream: column 2 section DICTIONARY_DATA start: 1626 length 0 + Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,4,488 + Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,12,464 + Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,20,440 + Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,28,416 + Stripe: offset: 1210 data: 185 rows: 5000 tail: 64 index: 116 + Stream: column 0 section ROW_INDEX start: 1210 length 17 + Stream: column 1 section ROW_INDEX start: 1227 length 60 + Stream: column 2 section ROW_INDEX start: 1287 length 39 + Stream: column 1 section DATA start: 1326 length 159 + Stream: column 1 section LENGTH start: 1485 length 15 + Stream: column 2 section PRESENT start: 1500 length 11 + Stream: column 2 section DATA start: 1511 length 0 + Stream: column 2 section LENGTH start: 1511 length 0 + Stream: column 2 section DICTIONARY_DATA start: 1511 length 0 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] @@ -105,6 +105,6 @@ Stripes: Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 -File length: 1940 bytes +File length: 1823 bytes Padding length: 0 bytes Padding ratio: 0% diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out index 50d5701..c4a7d22 100644 --- a/ql/src/test/results/clientpositive/orc_file_dump.q.out +++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out @@ -129,7 +129,7 @@ File Statistics: Column 11: count: 1049 hasNull: false sum: 13278 Stripes: - Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944 + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 9943 Stream: column 0 section ROW_INDEX start: 3 length 20 Stream: column 0 section BLOOM_FILTER start: 23 length 45 Stream: column 1 section ROW_INDEX start: 68 length 58 @@ -148,30 +148,30 @@ Stripes: Stream: column 7 section BLOOM_FILTER start: 6812 length 45 Stream: column 8 section ROW_INDEX start: 6857 length 86 Stream: column 8 section BLOOM_FILTER start: 6943 length 1157 - Stream: column 9 section ROW_INDEX start: 8100 length 51 - Stream: column 9 section BLOOM_FILTER start: 8151 length 62 - Stream: column 10 section ROW_INDEX start: 8213 length 82 - Stream: column 10 section BLOOM_FILTER start: 8295 length 1297 - Stream: column 11 section ROW_INDEX start: 9592 length 47 - Stream: column 11 section BLOOM_FILTER start: 9639 length 308 - Stream: column 1 section PRESENT start: 9947 length 17 - Stream: column 1 section DATA start: 9964 length 962 - Stream: column 2 section PRESENT start: 10926 length 17 - Stream: column 2 section DATA start: 10943 length 1441 - Stream: column 3 section DATA start: 12384 length 1704 - Stream: column 4 section DATA start: 14088 length 1998 - Stream: column 5 section DATA start: 16086 length 2925 - Stream: column 6 section DATA start: 19011 length 3323 - Stream: column 7 section DATA start: 22334 length 137 - Stream: column 8 section DATA start: 22471 length 1572 - Stream: column 8 section LENGTH start: 24043 length 310 - Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548 - Stream: column 9 section DATA start: 25901 length 62 - Stream: column 9 section SECONDARY start: 25963 length 1783 - Stream: column 10 section DATA start: 27746 length 2138 - Stream: column 10 section SECONDARY start: 29884 length 231 - Stream: column 11 section DATA start: 30115 length 1877 - Stream: column 11 section LENGTH start: 31992 length 591 + Stream: column 9 section ROW_INDEX start: 8100 length 50 + Stream: column 9 section BLOOM_FILTER start: 8150 length 62 + Stream: column 10 section ROW_INDEX start: 8212 length 82 + Stream: column 10 section BLOOM_FILTER start: 8294 length 1297 + Stream: column 11 section ROW_INDEX start: 9591 length 47 + Stream: column 11 section BLOOM_FILTER start: 9638 length 308 + Stream: column 1 section PRESENT start: 9946 length 17 + Stream: column 1 section DATA start: 9963 length 962 + Stream: column 2 section PRESENT start: 10925 length 17 + Stream: column 2 section DATA start: 10942 length 1441 + Stream: column 3 section DATA start: 12383 length 1704 + Stream: column 4 section DATA start: 14087 length 1998 + Stream: column 5 section DATA start: 16085 length 2925 + Stream: column 6 section DATA start: 19010 length 3323 + Stream: column 7 section DATA start: 22333 length 137 + Stream: column 8 section DATA start: 22470 length 1572 + Stream: column 8 section LENGTH start: 24042 length 310 + Stream: column 8 section DICTIONARY_DATA start: 24352 length 1548 + Stream: column 9 section DATA start: 25900 length 19 + Stream: column 9 section SECONDARY start: 25919 length 1783 + Stream: column 10 section DATA start: 27702 length 2138 + Stream: column 10 section SECONDARY start: 29840 length 231 + Stream: column 11 section DATA start: 30071 length 1877 + Stream: column 11 section LENGTH start: 31948 length 591 Encoding column 0: DIRECT Encoding column 1: DIRECT Encoding column 2: DIRECT_V2 @@ -192,7 +192,7 @@ Stripes: Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5 -File length: 33458 bytes +File length: 33416 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- @@ -247,7 +247,7 @@ File Statistics: Column 11: count: 1049 hasNull: false sum: 13278 Stripes: - Stripe: offset: 3 data: 22636 rows: 1049 tail: 251 index: 15096 + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 15095 Stream: column 0 section ROW_INDEX start: 3 length 20 Stream: column 0 section BLOOM_FILTER start: 23 length 56 Stream: column 1 section ROW_INDEX start: 79 length 58 @@ -266,30 +266,30 @@ Stripes: Stream: column 7 section BLOOM_FILTER start: 10385 length 56 Stream: column 8 section ROW_INDEX start: 10441 length 86 Stream: column 8 section BLOOM_FILTER start: 10527 length 1829 - Stream: column 9 section ROW_INDEX start: 12356 length 51 - Stream: column 9 section BLOOM_FILTER start: 12407 length 95 - Stream: column 10 section ROW_INDEX start: 12502 length 82 - Stream: column 10 section BLOOM_FILTER start: 12584 length 1994 - Stream: column 11 section ROW_INDEX start: 14578 length 47 - Stream: column 11 section BLOOM_FILTER start: 14625 length 474 - Stream: column 1 section PRESENT start: 15099 length 17 - Stream: column 1 section DATA start: 15116 length 962 - Stream: column 2 section PRESENT start: 16078 length 17 - Stream: column 2 section DATA start: 16095 length 1441 - Stream: column 3 section DATA start: 17536 length 1704 - Stream: column 4 section DATA start: 19240 length 1998 - Stream: column 5 section DATA start: 21238 length 2925 - Stream: column 6 section DATA start: 24163 length 3323 - Stream: column 7 section DATA start: 27486 length 137 - Stream: column 8 section DATA start: 27623 length 1572 - Stream: column 8 section LENGTH start: 29195 length 310 - Stream: column 8 section DICTIONARY_DATA start: 29505 length 1548 - Stream: column 9 section DATA start: 31053 length 62 - Stream: column 9 section SECONDARY start: 31115 length 1783 - Stream: column 10 section DATA start: 32898 length 2138 - Stream: column 10 section SECONDARY start: 35036 length 231 - Stream: column 11 section DATA start: 35267 length 1877 - Stream: column 11 section LENGTH start: 37144 length 591 + Stream: column 9 section ROW_INDEX start: 12356 length 50 + Stream: column 9 section BLOOM_FILTER start: 12406 length 95 + Stream: column 10 section ROW_INDEX start: 12501 length 82 + Stream: column 10 section BLOOM_FILTER start: 12583 length 1994 + Stream: column 11 section ROW_INDEX start: 14577 length 47 + Stream: column 11 section BLOOM_FILTER start: 14624 length 474 + Stream: column 1 section PRESENT start: 15098 length 17 + Stream: column 1 section DATA start: 15115 length 962 + Stream: column 2 section PRESENT start: 16077 length 17 + Stream: column 2 section DATA start: 16094 length 1441 + Stream: column 3 section DATA start: 17535 length 1704 + Stream: column 4 section DATA start: 19239 length 1998 + Stream: column 5 section DATA start: 21237 length 2925 + Stream: column 6 section DATA start: 24162 length 3323 + Stream: column 7 section DATA start: 27485 length 137 + Stream: column 8 section DATA start: 27622 length 1572 + Stream: column 8 section LENGTH start: 29194 length 310 + Stream: column 8 section DICTIONARY_DATA start: 29504 length 1548 + Stream: column 9 section DATA start: 31052 length 19 + Stream: column 9 section SECONDARY start: 31071 length 1783 + Stream: column 10 section DATA start: 32854 length 2138 + Stream: column 10 section SECONDARY start: 34992 length 231 + Stream: column 11 section DATA start: 35223 length 1877 + Stream: column 11 section LENGTH start: 37100 length 591 Encoding column 0: DIRECT Encoding column 1: DIRECT Encoding column 2: DIRECT_V2 @@ -310,7 +310,7 @@ Stripes: Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 285 loadFactor: 0.0297 expectedFpp: 2.0324289E-11 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 849 loadFactor: 0.0884 expectedFpp: 4.231118E-8 -File length: 38613 bytes +File length: 38568 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- @@ -377,7 +377,7 @@ File Statistics: Column 11: count: 1049 hasNull: false sum: 13278 Stripes: - Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944 + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 9943 Stream: column 0 section ROW_INDEX start: 3 length 20 Stream: column 0 section BLOOM_FILTER start: 23 length 45 Stream: column 1 section ROW_INDEX start: 68 length 58 @@ -396,30 +396,30 @@ Stripes: Stream: column 7 section BLOOM_FILTER start: 6812 length 45 Stream: column 8 section ROW_INDEX start: 6857 length 86 Stream: column 8 section BLOOM_FILTER start: 6943 length 1157 - Stream: column 9 section ROW_INDEX start: 8100 length 51 - Stream: column 9 section BLOOM_FILTER start: 8151 length 62 - Stream: column 10 section ROW_INDEX start: 8213 length 82 - Stream: column 10 section BLOOM_FILTER start: 8295 length 1297 - Stream: column 11 section ROW_INDEX start: 9592 length 47 - Stream: column 11 section BLOOM_FILTER start: 9639 length 308 - Stream: column 1 section PRESENT start: 9947 length 17 - Stream: column 1 section DATA start: 9964 length 962 - Stream: column 2 section PRESENT start: 10926 length 17 - Stream: column 2 section DATA start: 10943 length 1441 - Stream: column 3 section DATA start: 12384 length 1704 - Stream: column 4 section DATA start: 14088 length 1998 - Stream: column 5 section DATA start: 16086 length 2925 - Stream: column 6 section DATA start: 19011 length 3323 - Stream: column 7 section DATA start: 22334 length 137 - Stream: column 8 section DATA start: 22471 length 1572 - Stream: column 8 section LENGTH start: 24043 length 310 - Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548 - Stream: column 9 section DATA start: 25901 length 62 - Stream: column 9 section SECONDARY start: 25963 length 1783 - Stream: column 10 section DATA start: 27746 length 2138 - Stream: column 10 section SECONDARY start: 29884 length 231 - Stream: column 11 section DATA start: 30115 length 1877 - Stream: column 11 section LENGTH start: 31992 length 591 + Stream: column 9 section ROW_INDEX start: 8100 length 50 + Stream: column 9 section BLOOM_FILTER start: 8150 length 62 + Stream: column 10 section ROW_INDEX start: 8212 length 82 + Stream: column 10 section BLOOM_FILTER start: 8294 length 1297 + Stream: column 11 section ROW_INDEX start: 9591 length 47 + Stream: column 11 section BLOOM_FILTER start: 9638 length 308 + Stream: column 1 section PRESENT start: 9946 length 17 + Stream: column 1 section DATA start: 9963 length 962 + Stream: column 2 section PRESENT start: 10925 length 17 + Stream: column 2 section DATA start: 10942 length 1441 + Stream: column 3 section DATA start: 12383 length 1704 + Stream: column 4 section DATA start: 14087 length 1998 + Stream: column 5 section DATA start: 16085 length 2925 + Stream: column 6 section DATA start: 19010 length 3323 + Stream: column 7 section DATA start: 22333 length 137 + Stream: column 8 section DATA start: 22470 length 1572 + Stream: column 8 section LENGTH start: 24042 length 310 + Stream: column 8 section DICTIONARY_DATA start: 24352 length 1548 + Stream: column 9 section DATA start: 25900 length 19 + Stream: column 9 section SECONDARY start: 25919 length 1783 + Stream: column 10 section DATA start: 27702 length 2138 + Stream: column 10 section SECONDARY start: 29840 length 231 + Stream: column 11 section DATA start: 30071 length 1877 + Stream: column 11 section LENGTH start: 31948 length 591 Encoding column 0: DIRECT Encoding column 1: DIRECT Encoding column 2: DIRECT_V2 @@ -440,7 +440,7 @@ Stripes: Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5 -File length: 33458 bytes +File length: 33416 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- diff --git a/ql/src/test/results/clientpositive/orc_llap.q.out b/ql/src/test/results/clientpositive/orc_llap.q.out index c9bb3c8..742c7d8 100644 --- a/ql/src/test/results/clientpositive/orc_llap.q.out +++ b/ql/src/test/results/clientpositive/orc_llap.q.out @@ -698,17 +698,17 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: ((cint > 10) and cbigint is not null) (type: boolean) - Statistics: Num rows: 99583 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 98779 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cint > 10) and cbigint is not null) (type: boolean) - Statistics: Num rows: 16597 Data size: 265553 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16463 Data size: 263408 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16597 Data size: 265553 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16463 Data size: 263408 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 16597 Data size: 265553 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16463 Data size: 263408 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -768,17 +768,17 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: ((cint > 10) and cbigint is not null) (type: boolean) - Statistics: Num rows: 4979 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4938 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cint > 10) and cbigint is not null) (type: boolean) - Statistics: Num rows: 830 Data size: 265609 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 823 Data size: 263411 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 830 Data size: 265609 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 823 Data size: 263411 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 830 Data size: 265609 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 823 Data size: 263411 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -838,17 +838,17 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: ((cint > 5) and (cint < 10)) (type: boolean) - Statistics: Num rows: 15320 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15196 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cint > 5) and (cint < 10)) (type: boolean) - Statistics: Num rows: 1702 Data size: 177014 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1688 Data size: 175561 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1702 Data size: 177014 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1688 Data size: 175561 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1702 Data size: 177014 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1688 Data size: 175561 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -907,22 +907,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_llap - Statistics: Num rows: 7966 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7902 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring1 (type: string), cstring2 (type: string) outputColumnNames: cstring1, cstring2 - Statistics: Num rows: 7966 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7902 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: cstring1 (type: string), cstring2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7966 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7902 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 7966 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7902 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized LLAP IO: all inputs @@ -932,10 +932,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3983 Data size: 796669 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3951 Data size: 790234 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3983 Data size: 796669 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3951 Data size: 790234 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -999,14 +999,14 @@ STAGE PLANS: TableScan alias: o1 filterExpr: (cbigint is not null and csmallint is not null) (type: boolean) - Statistics: Num rows: 14226 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14111 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cbigint is not null and csmallint is not null) (type: boolean) - Statistics: Num rows: 3557 Data size: 398390 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3528 Data size: 395145 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cstring1 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 3557 Data size: 398390 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3528 Data size: 395145 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: smallint) @@ -1018,14 +1018,14 @@ STAGE PLANS: TableScan alias: o1 filterExpr: (cbigint is not null and csmallint is not null) (type: boolean) - Statistics: Num rows: 14226 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14111 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cbigint is not null and csmallint is not null) (type: boolean) - Statistics: Num rows: 3557 Data size: 398390 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3528 Data size: 395145 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cstring2 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 3557 Data size: 398390 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3528 Data size: 395145 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -1033,14 +1033,14 @@ STAGE PLANS: 0 _col0 (type: smallint) 1 _col0 (type: smallint) outputColumnNames: _col2, _col5 - Statistics: Num rows: 3912 Data size: 438229 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3880 Data size: 434659 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3912 Data size: 438229 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3880 Data size: 434659 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3912 Data size: 438229 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3880 Data size: 434659 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/orc_merge11.q.out b/ql/src/test/results/clientpositive/orc_merge11.q.out index da608db..f0769d4 100644 --- a/ql/src/test/results/clientpositive/orc_merge11.q.out +++ b/ql/src/test/results/clientpositive/orc_merge11.q.out @@ -96,22 +96,22 @@ File Statistics: Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0 Stripes: - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509 + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 85 - Stream: column 2 section ROW_INDEX start: 105 length 87 - Stream: column 3 section ROW_INDEX start: 192 length 111 - Stream: column 4 section ROW_INDEX start: 303 length 108 - Stream: column 5 section ROW_INDEX start: 411 length 101 - Stream: column 1 section DATA start: 512 length 871 - Stream: column 2 section DATA start: 1383 length 362 - Stream: column 2 section LENGTH start: 1745 length 8 - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23 - Stream: column 3 section DATA start: 1776 length 5167 - Stream: column 4 section DATA start: 6943 length 524 - Stream: column 4 section SECONDARY start: 7467 length 118 - Stream: column 5 section DATA start: 7585 length 2913 - Stream: column 5 section SECONDARY start: 10498 length 118 + Stream: column 1 section ROW_INDEX start: 20 length 83 + Stream: column 2 section ROW_INDEX start: 103 length 81 + Stream: column 3 section ROW_INDEX start: 184 length 111 + Stream: column 4 section ROW_INDEX start: 295 length 110 + Stream: column 5 section ROW_INDEX start: 405 length 96 + Stream: column 1 section DATA start: 501 length 45 + Stream: column 2 section DATA start: 546 length 41 + Stream: column 2 section LENGTH start: 587 length 8 + Stream: column 2 section DICTIONARY_DATA start: 595 length 23 + Stream: column 3 section DATA start: 618 length 5167 + Stream: column 4 section DATA start: 5785 length 524 + Stream: column 4 section SECONDARY start: 6309 length 18 + Stream: column 5 section DATA start: 6327 length 53 + Stream: column 5 section SECONDARY start: 6380 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -120,12 +120,12 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 -File length: 11071 bytes +File length: 6849 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- @@ -155,22 +155,22 @@ File Statistics: Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0 Stripes: - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509 + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 85 - Stream: column 2 section ROW_INDEX start: 105 length 87 - Stream: column 3 section ROW_INDEX start: 192 length 111 - Stream: column 4 section ROW_INDEX start: 303 length 108 - Stream: column 5 section ROW_INDEX start: 411 length 101 - Stream: column 1 section DATA start: 512 length 871 - Stream: column 2 section DATA start: 1383 length 362 - Stream: column 2 section LENGTH start: 1745 length 8 - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23 - Stream: column 3 section DATA start: 1776 length 5167 - Stream: column 4 section DATA start: 6943 length 524 - Stream: column 4 section SECONDARY start: 7467 length 118 - Stream: column 5 section DATA start: 7585 length 2913 - Stream: column 5 section SECONDARY start: 10498 length 118 + Stream: column 1 section ROW_INDEX start: 20 length 83 + Stream: column 2 section ROW_INDEX start: 103 length 81 + Stream: column 3 section ROW_INDEX start: 184 length 111 + Stream: column 4 section ROW_INDEX start: 295 length 110 + Stream: column 5 section ROW_INDEX start: 405 length 96 + Stream: column 1 section DATA start: 501 length 45 + Stream: column 2 section DATA start: 546 length 41 + Stream: column 2 section LENGTH start: 587 length 8 + Stream: column 2 section DICTIONARY_DATA start: 595 length 23 + Stream: column 3 section DATA start: 618 length 5167 + Stream: column 4 section DATA start: 5785 length 524 + Stream: column 4 section SECONDARY start: 6309 length 18 + Stream: column 5 section DATA start: 6327 length 53 + Stream: column 5 section SECONDARY start: 6380 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -179,12 +179,12 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 -File length: 11071 bytes +File length: 6849 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- @@ -244,22 +244,22 @@ File Statistics: Column 5: count: 100000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0 Stripes: - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509 + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 85 - Stream: column 2 section ROW_INDEX start: 105 length 87 - Stream: column 3 section ROW_INDEX start: 192 length 111 - Stream: column 4 section ROW_INDEX start: 303 length 108 - Stream: column 5 section ROW_INDEX start: 411 length 101 - Stream: column 1 section DATA start: 512 length 871 - Stream: column 2 section DATA start: 1383 length 362 - Stream: column 2 section LENGTH start: 1745 length 8 - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23 - Stream: column 3 section DATA start: 1776 length 5167 - Stream: column 4 section DATA start: 6943 length 524 - Stream: column 4 section SECONDARY start: 7467 length 118 - Stream: column 5 section DATA start: 7585 length 2913 - Stream: column 5 section SECONDARY start: 10498 length 118 + Stream: column 1 section ROW_INDEX start: 20 length 83 + Stream: column 2 section ROW_INDEX start: 103 length 81 + Stream: column 3 section ROW_INDEX start: 184 length 111 + Stream: column 4 section ROW_INDEX start: 295 length 110 + Stream: column 5 section ROW_INDEX start: 405 length 96 + Stream: column 1 section DATA start: 501 length 45 + Stream: column 2 section DATA start: 546 length 41 + Stream: column 2 section LENGTH start: 587 length 8 + Stream: column 2 section DICTIONARY_DATA start: 595 length 23 + Stream: column 3 section DATA start: 618 length 5167 + Stream: column 4 section DATA start: 5785 length 524 + Stream: column 4 section SECONDARY start: 6309 length 18 + Stream: column 5 section DATA start: 6327 length 53 + Stream: column 5 section SECONDARY start: 6380 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -268,26 +268,26 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 - Stripe: offset: 10733 data: 10104 rows: 50000 tail: 117 index: 509 - Stream: column 0 section ROW_INDEX start: 10733 length 17 - Stream: column 1 section ROW_INDEX start: 10750 length 85 - Stream: column 2 section ROW_INDEX start: 10835 length 87 - Stream: column 3 section ROW_INDEX start: 10922 length 111 - Stream: column 4 section ROW_INDEX start: 11033 length 108 - Stream: column 5 section ROW_INDEX start: 11141 length 101 - Stream: column 1 section DATA start: 11242 length 871 - Stream: column 2 section DATA start: 12113 length 362 - Stream: column 2 section LENGTH start: 12475 length 8 - Stream: column 2 section DICTIONARY_DATA start: 12483 length 23 - Stream: column 3 section DATA start: 12506 length 5167 - Stream: column 4 section DATA start: 17673 length 524 - Stream: column 4 section SECONDARY start: 18197 length 118 - Stream: column 5 section DATA start: 18315 length 2913 - Stream: column 5 section SECONDARY start: 21228 length 118 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 + Stripe: offset: 6511 data: 5897 rows: 50000 tail: 113 index: 498 + Stream: column 0 section ROW_INDEX start: 6511 length 17 + Stream: column 1 section ROW_INDEX start: 6528 length 83 + Stream: column 2 section ROW_INDEX start: 6611 length 81 + Stream: column 3 section ROW_INDEX start: 6692 length 111 + Stream: column 4 section ROW_INDEX start: 6803 length 110 + Stream: column 5 section ROW_INDEX start: 6913 length 96 + Stream: column 1 section DATA start: 7009 length 45 + Stream: column 2 section DATA start: 7054 length 41 + Stream: column 2 section LENGTH start: 7095 length 8 + Stream: column 2 section DICTIONARY_DATA start: 7103 length 23 + Stream: column 3 section DATA start: 7126 length 5167 + Stream: column 4 section DATA start: 12293 length 524 + Stream: column 4 section SECONDARY start: 12817 length 18 + Stream: column 5 section DATA start: 12835 length 53 + Stream: column 5 section SECONDARY start: 12888 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -296,12 +296,12 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 -File length: 21814 bytes +File length: 13369 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out index 2b13dc6..1402a3f 100644 --- a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out @@ -813,11 +813,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cmodtinyint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -826,7 +826,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -934,11 +934,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cmodtinyint (type: int), cmodint (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -950,7 +950,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1058,11 +1058,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cmodtinyint (type: int), cmodint (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -1074,7 +1074,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1182,11 +1182,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cmodtinyint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -1198,7 +1198,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1300,11 +1300,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cmodtinyint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) @@ -1323,11 +1323,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cmodtinyint (type: int), cmodint (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -1337,7 +1337,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -1346,7 +1346,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 - Statistics: Num rows: 7329 Data size: 3379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7329 Data size: 3335 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out index da608db..f0769d4 100644 --- a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out +++ b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out @@ -96,22 +96,22 @@ File Statistics: Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0 Stripes: - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509 + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 85 - Stream: column 2 section ROW_INDEX start: 105 length 87 - Stream: column 3 section ROW_INDEX start: 192 length 111 - Stream: column 4 section ROW_INDEX start: 303 length 108 - Stream: column 5 section ROW_INDEX start: 411 length 101 - Stream: column 1 section DATA start: 512 length 871 - Stream: column 2 section DATA start: 1383 length 362 - Stream: column 2 section LENGTH start: 1745 length 8 - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23 - Stream: column 3 section DATA start: 1776 length 5167 - Stream: column 4 section DATA start: 6943 length 524 - Stream: column 4 section SECONDARY start: 7467 length 118 - Stream: column 5 section DATA start: 7585 length 2913 - Stream: column 5 section SECONDARY start: 10498 length 118 + Stream: column 1 section ROW_INDEX start: 20 length 83 + Stream: column 2 section ROW_INDEX start: 103 length 81 + Stream: column 3 section ROW_INDEX start: 184 length 111 + Stream: column 4 section ROW_INDEX start: 295 length 110 + Stream: column 5 section ROW_INDEX start: 405 length 96 + Stream: column 1 section DATA start: 501 length 45 + Stream: column 2 section DATA start: 546 length 41 + Stream: column 2 section LENGTH start: 587 length 8 + Stream: column 2 section DICTIONARY_DATA start: 595 length 23 + Stream: column 3 section DATA start: 618 length 5167 + Stream: column 4 section DATA start: 5785 length 524 + Stream: column 4 section SECONDARY start: 6309 length 18 + Stream: column 5 section DATA start: 6327 length 53 + Stream: column 5 section SECONDARY start: 6380 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -120,12 +120,12 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 -File length: 11071 bytes +File length: 6849 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- @@ -155,22 +155,22 @@ File Statistics: Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0 Stripes: - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509 + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 85 - Stream: column 2 section ROW_INDEX start: 105 length 87 - Stream: column 3 section ROW_INDEX start: 192 length 111 - Stream: column 4 section ROW_INDEX start: 303 length 108 - Stream: column 5 section ROW_INDEX start: 411 length 101 - Stream: column 1 section DATA start: 512 length 871 - Stream: column 2 section DATA start: 1383 length 362 - Stream: column 2 section LENGTH start: 1745 length 8 - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23 - Stream: column 3 section DATA start: 1776 length 5167 - Stream: column 4 section DATA start: 6943 length 524 - Stream: column 4 section SECONDARY start: 7467 length 118 - Stream: column 5 section DATA start: 7585 length 2913 - Stream: column 5 section SECONDARY start: 10498 length 118 + Stream: column 1 section ROW_INDEX start: 20 length 83 + Stream: column 2 section ROW_INDEX start: 103 length 81 + Stream: column 3 section ROW_INDEX start: 184 length 111 + Stream: column 4 section ROW_INDEX start: 295 length 110 + Stream: column 5 section ROW_INDEX start: 405 length 96 + Stream: column 1 section DATA start: 501 length 45 + Stream: column 2 section DATA start: 546 length 41 + Stream: column 2 section LENGTH start: 587 length 8 + Stream: column 2 section DICTIONARY_DATA start: 595 length 23 + Stream: column 3 section DATA start: 618 length 5167 + Stream: column 4 section DATA start: 5785 length 524 + Stream: column 4 section SECONDARY start: 6309 length 18 + Stream: column 5 section DATA start: 6327 length 53 + Stream: column 5 section SECONDARY start: 6380 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -179,12 +179,12 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 -File length: 11071 bytes +File length: 6849 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- @@ -244,22 +244,22 @@ File Statistics: Column 5: count: 100000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0 Stripes: - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509 + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 85 - Stream: column 2 section ROW_INDEX start: 105 length 87 - Stream: column 3 section ROW_INDEX start: 192 length 111 - Stream: column 4 section ROW_INDEX start: 303 length 108 - Stream: column 5 section ROW_INDEX start: 411 length 101 - Stream: column 1 section DATA start: 512 length 871 - Stream: column 2 section DATA start: 1383 length 362 - Stream: column 2 section LENGTH start: 1745 length 8 - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23 - Stream: column 3 section DATA start: 1776 length 5167 - Stream: column 4 section DATA start: 6943 length 524 - Stream: column 4 section SECONDARY start: 7467 length 118 - Stream: column 5 section DATA start: 7585 length 2913 - Stream: column 5 section SECONDARY start: 10498 length 118 + Stream: column 1 section ROW_INDEX start: 20 length 83 + Stream: column 2 section ROW_INDEX start: 103 length 81 + Stream: column 3 section ROW_INDEX start: 184 length 111 + Stream: column 4 section ROW_INDEX start: 295 length 110 + Stream: column 5 section ROW_INDEX start: 405 length 96 + Stream: column 1 section DATA start: 501 length 45 + Stream: column 2 section DATA start: 546 length 41 + Stream: column 2 section LENGTH start: 587 length 8 + Stream: column 2 section DICTIONARY_DATA start: 595 length 23 + Stream: column 3 section DATA start: 618 length 5167 + Stream: column 4 section DATA start: 5785 length 524 + Stream: column 4 section SECONDARY start: 6309 length 18 + Stream: column 5 section DATA start: 6327 length 53 + Stream: column 5 section SECONDARY start: 6380 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -268,26 +268,26 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 - Stripe: offset: 10733 data: 10104 rows: 50000 tail: 117 index: 509 - Stream: column 0 section ROW_INDEX start: 10733 length 17 - Stream: column 1 section ROW_INDEX start: 10750 length 85 - Stream: column 2 section ROW_INDEX start: 10835 length 87 - Stream: column 3 section ROW_INDEX start: 10922 length 111 - Stream: column 4 section ROW_INDEX start: 11033 length 108 - Stream: column 5 section ROW_INDEX start: 11141 length 101 - Stream: column 1 section DATA start: 11242 length 871 - Stream: column 2 section DATA start: 12113 length 362 - Stream: column 2 section LENGTH start: 12475 length 8 - Stream: column 2 section DICTIONARY_DATA start: 12483 length 23 - Stream: column 3 section DATA start: 12506 length 5167 - Stream: column 4 section DATA start: 17673 length 524 - Stream: column 4 section SECONDARY start: 18197 length 118 - Stream: column 5 section DATA start: 18315 length 2913 - Stream: column 5 section SECONDARY start: 21228 length 118 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 + Stripe: offset: 6511 data: 5897 rows: 50000 tail: 113 index: 498 + Stream: column 0 section ROW_INDEX start: 6511 length 17 + Stream: column 1 section ROW_INDEX start: 6528 length 83 + Stream: column 2 section ROW_INDEX start: 6611 length 81 + Stream: column 3 section ROW_INDEX start: 6692 length 111 + Stream: column 4 section ROW_INDEX start: 6803 length 110 + Stream: column 5 section ROW_INDEX start: 6913 length 96 + Stream: column 1 section DATA start: 7009 length 45 + Stream: column 2 section DATA start: 7054 length 41 + Stream: column 2 section LENGTH start: 7095 length 8 + Stream: column 2 section DICTIONARY_DATA start: 7103 length 23 + Stream: column 3 section DATA start: 7126 length 5167 + Stream: column 4 section DATA start: 12293 length 524 + Stream: column 4 section SECONDARY start: 12817 length 18 + Stream: column 5 section DATA start: 12835 length 53 + Stream: column 5 section SECONDARY start: 12888 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -296,12 +296,12 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 -File length: 21814 bytes +File length: 13369 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP --