diff --git llap-client/src/java/org/apache/hadoop/hive/llap/io/api/LlapIo.java llap-client/src/java/org/apache/hadoop/hive/llap/io/api/LlapIo.java index 42129b7511..88c1a4cb6b 100644 --- llap-client/src/java/org/apache/hadoop/hive/llap/io/api/LlapIo.java +++ llap-client/src/java/org/apache/hadoop/hive/llap/io/api/LlapIo.java @@ -18,12 +18,14 @@ package org.apache.hadoop.hive.llap.io.api; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.InputFormat; public interface LlapIo { - InputFormat getInputFormat(InputFormat sourceInputFormat, Deserializer serde); + InputFormat getInputFormat( + InputFormat sourceInputFormat, Deserializer serde, Configuration conf); void close(); String getMemoryInfo(); } diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java index 294fb2b41f..eff5264f62 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java @@ -77,6 +77,7 @@ // TODO: later, we may have a map private final ColumnVectorProducer orcCvp, genericCvp; + private final ColumnVectorProducer acidOrcCvp; private final ExecutorService executor; private final LlapDaemonCacheMetrics cacheMetrics; private final LlapDaemonIOMetrics ioMetrics; @@ -185,9 +186,11 @@ private LlapIoImpl(Configuration conf) throws IOException { new ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build()); // TODO: this should depends on input format and be in a map, or something. this.orcCvp = new OrcColumnVectorProducer( - metadataCache, cache, bufferManager, conf, cacheMetrics, ioMetrics); + metadataCache, cache, bufferManager, conf, cacheMetrics, ioMetrics, false); this.genericCvp = isEncodeEnabled ? new GenericColumnVectorProducer( serdeCache, bufferManager, conf, cacheMetrics, ioMetrics) : null; + this.acidOrcCvp = new OrcColumnVectorProducer( + metadataCache, cache, bufferManager, conf, cacheMetrics, ioMetrics, true); LOG.info("LLAP IO initialized"); registerMXBeans(); @@ -208,10 +211,15 @@ public String getMemoryInfo() { @SuppressWarnings("rawtypes") @Override public InputFormat getInputFormat( - InputFormat sourceInputFormat, Deserializer sourceSerDe) { + InputFormat sourceInputFormat, Deserializer sourceSerDe, Configuration conf) { ColumnVectorProducer cvp = genericCvp; if (sourceInputFormat instanceof OrcInputFormat) { - cvp = orcCvp; // Special-case for ORC. + OrcInputFormat orcInputFormat = (OrcInputFormat) sourceInputFormat; + if (orcInputFormat.isAcidRead(conf)) { + cvp = acidOrcCvp; // Special case for ACID ORC. + } else { + cvp = orcCvp; // Special case for non-ACID ORC. + } } else if (cvp == null) { LOG.warn("LLAP encode is disabled; cannot use for " + sourceInputFormat.getClass()); return null; diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcAcidEncodedDataConsumer.java llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcAcidEncodedDataConsumer.java new file mode 100644 index 0000000000..04d02f21fe --- /dev/null +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcAcidEncodedDataConsumer.java @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.io.decode; + +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; +import org.apache.hadoop.hive.llap.io.api.impl.ColumnVectorBatch; +import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger; +import org.apache.hadoop.hive.ql.io.orc.OrcSplit; +import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader; +import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer; +import org.apache.hadoop.hive.ql.io.orc.encoded.Reader; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; + +import java.io.IOException; + +import static org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.findRecordsWithInvalidTransactionIds; + +/** + * OrcAcidEncodeDataConsumer consumes data after merging the base, delta, and delete delta. + */ +public class OrcAcidEncodedDataConsumer extends OrcEncodedDataConsumer implements ReadPipeline { + private final InnerConsumer innerConsumer = new InnerConsumer(); + private final JobConf conf; + private final FileSplit split; + + public OrcAcidEncodedDataConsumer( + Consumer consumer, int size, boolean skipCorrupt, + QueryFragmentCounters counters, LlapDaemonIOMetrics ioMetrics, + JobConf conf, FileSplit split) throws IOException { + super(consumer, size, skipCorrupt, counters, ioMetrics); + this.split = split; + this.conf = conf; + } + + @Override + protected void decodeBatch(Reader.OrcEncodedColumnBatch batch, + Consumer downstreamConsumer) { + innerConsumer.downstreamConsumer = downstreamConsumer; + super.decodeBatch(batch, innerConsumer); + } + + private class InnerConsumer implements Consumer { + Consumer downstreamConsumer; + VectorizedOrcAcidRowBatchReader.DeleteEventRegistry deleteEventRegistry; + + InnerConsumer() { + // Clone readerOptions for deleteEvents. + Reader.Options readerOptions = OrcInputFormat.createOptionsForReader(conf); + readerOptions = OrcRawRecordMerger.createEventOptions(readerOptions); + Reader.Options deleteEventReaderOptions = readerOptions.clone(); + // Set the range on the deleteEventReaderOptions to 0 to INTEGER_MAX because + // we always want to read all the delete delta files. + deleteEventReaderOptions.range(0, Long.MAX_VALUE); + // Disable SARGs for deleteEventReaders, as SARGs have no meaning. + deleteEventReaderOptions.searchArgument(null, null); + OrcSplit orcSplit = (OrcSplit) split; + + try { + // See if we can load all the delete events from all the delete deltas in memory... + deleteEventRegistry = VectorizedOrcAcidRowBatchReader.ColumnizedDeleteEventRegistry.get( + conf, orcSplit, deleteEventReaderOptions); + if (deleteEventRegistry == null) { + // If not, then create a set of hanging readers that do sort-merge to find the next smallest + // delete event on-demand. Caps the memory consumption to (some_const * no. of readers). + deleteEventRegistry = new VectorizedOrcAcidRowBatchReader.SortMergedDeleteEventRegistry( + conf, orcSplit, deleteEventReaderOptions); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void consumeData(ColumnVectorBatch data) { + final int[] selected = new int[data.size]; + int size = data.size; + + String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); + ValidTxnList validTxnList = (txnString == null) ? new ValidReadTxnList() : new ValidReadTxnList(txnString); + + // Case 1- find rows which belong to transactions that are not valid. + size = findRecordsWithInvalidTransactionIds(data.cols, size, selected, validTxnList); + + // Case 2- find rows which have been deleted. + try { + size = deleteEventRegistry.findDeletedRecords(data.cols, size, selected); + } catch (IOException e) { + throw new RuntimeException(e); + } + + // Select only not deleted ones + if (size != data.size) { + data.size = size; + for (int i = 0; i < size; i++) { + final int setBitIndex = selected[i]; + for (ColumnVector columnVector : data.cols) { + columnVector.setElement(i, setBitIndex, columnVector); + } + } + } + + downstreamConsumer.consumeData(data); + } + + @Override + public void setDone() { + downstreamConsumer.setDone(); + try { + deleteEventRegistry.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void setError(Throwable t) { + downstreamConsumer.setError(t); + try { + deleteEventRegistry.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } +} diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java index 121e169fc6..bf07e3e2e6 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java @@ -24,7 +24,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.llap.cache.BufferUsageManager; import org.apache.hadoop.hive.llap.cache.LowLevelCache; import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; @@ -34,7 +33,7 @@ import org.apache.hadoop.hive.llap.io.metadata.OrcMetadataCache; import org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics; import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader; import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -52,19 +51,22 @@ private final LowLevelCache lowLevelCache; private final BufferUsageManager bufferManager; private final Configuration conf; + private final boolean isAcid; private boolean _skipCorrupt; // TODO: get rid of this private LlapDaemonCacheMetrics cacheMetrics; private LlapDaemonIOMetrics ioMetrics; public OrcColumnVectorProducer(OrcMetadataCache metadataCache, LowLevelCache lowLevelCache, BufferUsageManager bufferManager, - Configuration conf, LlapDaemonCacheMetrics cacheMetrics, LlapDaemonIOMetrics ioMetrics) { + Configuration conf, LlapDaemonCacheMetrics cacheMetrics, + LlapDaemonIOMetrics ioMetrics, boolean isAcid) { LlapIoImpl.LOG.info("Initializing ORC column vector producer"); this.metadataCache = metadataCache; this.lowLevelCache = lowLevelCache; this.bufferManager = bufferManager; this.conf = conf; + this.isAcid = isAcid; this._skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf); this.cacheMetrics = cacheMetrics; this.ioMetrics = ioMetrics; @@ -76,12 +78,21 @@ public ReadPipeline createReadPipeline( SearchArgument sarg, String[] columnNames, QueryFragmentCounters counters, TypeDescription readerSchema, InputFormat unused0, Deserializer unused1, Reporter reporter, JobConf job, Map unused2) throws IOException { + + final OrcEncodedDataConsumer edc; + final OrcEncodedDataReader reader; cacheMetrics.incrCacheReadRequests(); - OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, columnIds.size(), - _skipCorrupt, counters, ioMetrics); - OrcEncodedDataReader reader = new OrcEncodedDataReader( - lowLevelCache, bufferManager, metadataCache, conf, job, split, columnIds, sarg, - columnNames, edc, counters, readerSchema); + + if (isAcid && VectorizedOrcAcidRowBatchReader.isAcid(job, split)) { + edc = new OrcAcidEncodedDataConsumer( + consumer, columnIds.size(), _skipCorrupt, counters, ioMetrics, job, split); + } else { + edc = new OrcEncodedDataConsumer( + consumer, columnIds.size(), _skipCorrupt, counters, ioMetrics); + } + reader = new OrcEncodedDataReader( + lowLevelCache, bufferManager, metadataCache, conf, job, split, + columnIds, sarg, columnNames, edc, counters, readerSchema); edc.init(reader, reader); return edc; } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 21394c6aab..f7646c1225 100755 --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -287,7 +287,7 @@ public void configure(JobConf job) { throw new HiveException("Error creating SerDe for LLAP IO", e); } } - InputFormat wrappedIf = llapIo.getInputFormat(inputFormat, serde); + InputFormat wrappedIf = llapIo.getInputFormat(inputFormat, serde, conf); if (wrappedIf == null) { return inputFormat; // We cannot wrap; the cause is logged inside. } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 0ef7c758d4..4c024f1a6a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -209,6 +209,10 @@ public boolean isAcidRead(Configuration conf, InputSplit inputSplit) { return HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); } + public boolean isAcidRead(Configuration conf) { + return HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); + } + private static class OrcRecordReader implements org.apache.hadoop.mapred.RecordReader, StatsProvidingRecordReader { @@ -1842,8 +1846,7 @@ private static void scheduleSplits(ETLSplitStrategy splitStrategy, Context conte reporter.setStatus(inputSplit.toString()); - boolean isFastVectorizedReaderAvailable = - VectorizedOrcAcidRowBatchReader.canCreateVectorizedAcidRowBatchReaderOnSplit(conf, inputSplit); + boolean isFastVectorizedReaderAvailable = VectorizedOrcAcidRowBatchReader.isAcid(conf, inputSplit); if (vectorMode && isFastVectorizedReaderAvailable) { // Faster vectorized ACID row batch reader is available that avoids row-by-row stitching. @@ -2024,7 +2027,7 @@ static Path findOriginalBucket(FileSystem fs, directory); } - static Reader.Options createOptionsForReader(Configuration conf) { + public static Reader.Options createOptionsForReader(Configuration conf) { /** * Do we have schema on read in the configuration variables? */ diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java index 95b8806e70..de0f0bc377 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java @@ -399,7 +399,7 @@ private void discoverKeyBounds(Reader reader, * @param options options for the row reader * @return a cloned options object that is modified for the event reader */ - static Reader.Options createEventOptions(Reader.Options options) { + public static Reader.Options createEventOptions(Reader.Options options) { Reader.Options result = options.clone(); result.range(options.getOffset(), Long.MAX_VALUE); result.include(options.getInclude()); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java index d61b24bef3..eb6231f464 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java @@ -218,7 +218,7 @@ public long getColumnarProjectionSize() { @Override public boolean canUseLlapIo() { - return isOriginal && (deltas == null || deltas.isEmpty()); + return isOriginal; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index 75c7680e26..9bbc1bd51a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -20,8 +20,6 @@ import java.io.IOException; import java.util.Arrays; -import java.util.BitSet; -import java.util.List; import java.util.Map.Entry; import java.util.TreeMap; @@ -32,6 +30,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -42,15 +41,13 @@ import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; -import org.apache.orc.OrcProto; -import org.apache.orc.OrcUtils; -import org.apache.orc.TypeDescription; + +import com.google.common.annotations.VisibleForTesting; import org.apache.orc.impl.AcidStats; import org.apache.orc.impl.OrcAcidUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; /** * A fast vectorized batch reader class for ACID when split-update behavior is enabled. * When split-update is turned on, row-by-row stitching could be avoided to create the final @@ -131,10 +128,10 @@ public VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf, deleteEventReaderOptions.range(0, Long.MAX_VALUE); // Disable SARGs for deleteEventReaders, as SARGs have no meaning. deleteEventReaderOptions.searchArgument(null, null); - try { - // See if we can load all the delete events from all the delete deltas in memory... - this.deleteEventRegistry = new ColumnizedDeleteEventRegistry(conf, orcSplit, deleteEventReaderOptions); - } catch (DeleteEventsOverflowMemoryException e) { + + // See if we can load all the delete events from all the delete deltas in memory... + this.deleteEventRegistry = ColumnizedDeleteEventRegistry.get(conf, orcSplit, deleteEventReaderOptions); + if (this.deleteEventRegistry == null) { // If not, then create a set of hanging readers that do sort-merge to find the next smallest // delete event on-demand. Caps the memory consumption to (some_const * no. of readers). this.deleteEventRegistry = new SortMergedDeleteEventRegistry(conf, orcSplit, deleteEventReaderOptions); @@ -147,7 +144,7 @@ public VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf, * @param inputSplit * @return true if it is possible, else false. */ - public static boolean canCreateVectorizedAcidRowBatchReaderOnSplit(JobConf conf, InputSplit inputSplit) { + public static boolean isAcid(JobConf conf, InputSplit inputSplit) { if (!(inputSplit instanceof OrcSplit)) { return false; // must be an instance of OrcSplit. } @@ -163,7 +160,7 @@ public static boolean canCreateVectorizedAcidRowBatchReaderOnSplit(JobConf conf, return false; // no split-update or possibly reading originals! } - private static Path[] getDeleteDeltaDirsFromSplit(OrcSplit orcSplit) throws IOException { + public static Path[] getDeleteDeltaDirsFromSplit(OrcSplit orcSplit) throws IOException { Path path = orcSplit.getPath(); Path root; if (orcSplit.hasBase()) { @@ -206,42 +203,37 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti // We will go through the batch to discover rows which match any of the cases and specifically // remove them from the selected vector. Of course, selectedInUse should also be set. - BitSet selectedBitSet = new BitSet(vectorizedRowBatchBase.size); + int size = vectorizedRowBatchBase.size; + final int[] selected = value.selected; if (vectorizedRowBatchBase.selectedInUse) { // When selectedInUse is true, start with every bit set to false and selectively set // certain bits to true based on the selected[] vector. - selectedBitSet.set(0, vectorizedRowBatchBase.size, false); - for (int j = 0; j < vectorizedRowBatchBase.size; ++j) { - int i = vectorizedRowBatchBase.selected[j]; - selectedBitSet.set(i); - } + System.arraycopy( + vectorizedRowBatchBase.selected, 0, selected, 0, size); } else { - // When selectedInUse is set to false, everything in the batch is selected. - selectedBitSet.set(0, vectorizedRowBatchBase.size, true); + for (int i = 0; i < size; i++) { + selected[i] = i; + } } // Case 1- find rows which belong to transactions that are not valid. - findRecordsWithInvalidTransactionIds(vectorizedRowBatchBase, selectedBitSet); + size = findRecordsWithInvalidTransactionIds( + vectorizedRowBatchBase.cols, size, selected, validTxnList); // Case 2- find rows which have been deleted. - this.deleteEventRegistry.findDeletedRecords(vectorizedRowBatchBase, selectedBitSet); + size = this.deleteEventRegistry.findDeletedRecords( + vectorizedRowBatchBase.cols, size, selected); - if (selectedBitSet.cardinality() == vectorizedRowBatchBase.size) { + if (size == vectorizedRowBatchBase.size) { // None of the cases above matched and everything is selected. Hence, we will use the // same values for the selected and selectedInUse. value.size = vectorizedRowBatchBase.size; value.selected = vectorizedRowBatchBase.selected; value.selectedInUse = vectorizedRowBatchBase.selectedInUse; } else { - value.size = selectedBitSet.cardinality(); + value.size = size; value.selectedInUse = true; - value.selected = new int[selectedBitSet.cardinality()]; - // This loop fills up the selected[] vector with all the index positions that are selected. - for (int setBitIndex = selectedBitSet.nextSetBit(0), selectedItr = 0; - setBitIndex >= 0; - setBitIndex = selectedBitSet.nextSetBit(setBitIndex+1), ++selectedItr) { - value.selected[selectedItr] = setBitIndex; - } + value.selected = selected; } // Finally, link up the columnVector from the base VectorizedRowBatch to outgoing batch. @@ -257,28 +249,31 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti return true; } - private void findRecordsWithInvalidTransactionIds(VectorizedRowBatch batch, BitSet selectedBitSet) { - if (batch.cols[OrcRecordUpdater.CURRENT_TRANSACTION].isRepeating) { + public static int findRecordsWithInvalidTransactionIds( + ColumnVector[] cols, int size, int[] selected, ValidTxnList validTxnList) { + + if (cols[OrcRecordUpdater.CURRENT_TRANSACTION].isRepeating) { // When we have repeating values, we can unset the whole bitset at once // if the repeating value is not a valid transaction. long currentTransactionIdForBatch = ((LongColumnVector) - batch.cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector[0]; + cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector[0]; if (!validTxnList.isTxnValid(currentTransactionIdForBatch)) { - selectedBitSet.clear(0, batch.size); + return 0; } - return; + return size; } long[] currentTransactionVector = - ((LongColumnVector) batch.cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector; + ((LongColumnVector) cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector; // Loop through the bits that are set to true and mark those rows as false, if their // current transactions are not valid. - for (int setBitIndex = selectedBitSet.nextSetBit(0); - setBitIndex >= 0; - setBitIndex = selectedBitSet.nextSetBit(setBitIndex+1)) { - if (!validTxnList.isTxnValid(currentTransactionVector[setBitIndex])) { - selectedBitSet.clear(setBitIndex); + int j = 0; + for (int i = 0; i < size; i++) { + final int setBitIndex = selected[i]; + if (validTxnList.isTxnValid(currentTransactionVector[setBitIndex])) { + selected[j++] = setBitIndex; } - } + } + return j; } @Override @@ -321,22 +316,24 @@ DeleteEventRegistry getDeleteEventRegistry() { * will read the delete delta files and will create their own internal * data structures to maintain record ids of the records that got deleted. */ - static interface DeleteEventRegistry { + public interface DeleteEventRegistry { /** * Modifies the passed bitset to indicate which of the rows in the batch * have been deleted. Assumes that the batch.size is equal to bitset size. - * @param batch - * @param selectedBitSet + * @param cols + * @param size + * @param selected * @throws IOException */ - public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) throws IOException; + public int findDeletedRecords( + ColumnVector[] cols, int size, int[] selected) throws IOException; /** * The close() method can be called externally to signal the implementing classes * to free up resources. * @throws IOException */ - public void close() throws IOException; + void close() throws IOException; } /** @@ -346,7 +343,7 @@ DeleteEventRegistry getDeleteEventRegistry() { * amount of memory usage, given the number of delete delta files. Therefore, this * implementation will be picked up when the memory pressure is high. */ - static class SortMergedDeleteEventRegistry implements DeleteEventRegistry { + public static class SortMergedDeleteEventRegistry implements DeleteEventRegistry { private OrcRawRecordMerger deleteRecords; private OrcRawRecordMerger.ReaderKey deleteRecordKey; private OrcStruct deleteRecordValue; @@ -375,36 +372,36 @@ public SortMergedDeleteEventRegistry(JobConf conf, OrcSplit orcSplit, Reader.Opt } @Override - public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) + public int findDeletedRecords(ColumnVector[] cols, int size, int[] selected) throws IOException { if (!isDeleteRecordAvailable) { - return; + return size; } long[] originalTransaction = - batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector; + cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null + : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector; long[] bucket = - batch.cols[OrcRecordUpdater.BUCKET].isRepeating ? null - : ((LongColumnVector) batch.cols[OrcRecordUpdater.BUCKET]).vector; + cols[OrcRecordUpdater.BUCKET].isRepeating ? null + : ((LongColumnVector) cols[OrcRecordUpdater.BUCKET]).vector; long[] rowId = - batch.cols[OrcRecordUpdater.ROW_ID].isRepeating ? null - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector; + cols[OrcRecordUpdater.ROW_ID].isRepeating ? null + : ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector; // The following repeatedX values will be set, if any of the columns are repeating. long repeatedOriginalTransaction = (originalTransaction != null) ? -1 - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0]; + : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0]; long repeatedBucket = (bucket != null) ? -1 - : ((LongColumnVector) batch.cols[OrcRecordUpdater.BUCKET]).vector[0]; + : ((LongColumnVector) cols[OrcRecordUpdater.BUCKET]).vector[0]; long repeatedRowId = (rowId != null) ? -1 - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector[0]; + : ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector[0]; // Get the first valid row in the batch still available. - int firstValidIndex = selectedBitSet.nextSetBit(0); - if (firstValidIndex == -1) { - return; // Everything in the batch has already been filtered out. + if (size == 0) { + return size; // Everything in the batch has already been filtered out. } + final int firstValidIndex = selected[0]; RecordIdentifier firstRecordIdInBatch = new RecordIdentifier( originalTransaction != null ? originalTransaction[firstValidIndex] : repeatedOriginalTransaction, @@ -412,7 +409,7 @@ public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) rowId != null ? (int) rowId[firstValidIndex] : repeatedRowId); // Get the last valid row in the batch still available. - int lastValidIndex = selectedBitSet.previousSetBit(batch.size - 1); + final int lastValidIndex = selected[size - 1]; RecordIdentifier lastRecordIdInBatch = new RecordIdentifier( originalTransaction != null ? originalTransaction[lastValidIndex] : repeatedOriginalTransaction, @@ -423,40 +420,45 @@ public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) // deleteRecord >= firstRecordInBatch or until we exhaust all the delete records. while (deleteRecordKey.compareRow(firstRecordIdInBatch) == -1) { isDeleteRecordAvailable = deleteRecords.next(deleteRecordKey, deleteRecordValue); - if (!isDeleteRecordAvailable) return; // exhausted all delete records, return. + if (!isDeleteRecordAvailable) return size; // exhausted all delete records, return. } // If we are here, then we have established that firstRecordInBatch <= deleteRecord. // Now continue marking records which have been deleted until we reach the end of the batch // or we exhaust all the delete records. - int currIndex = firstValidIndex; RecordIdentifier currRecordIdInBatch = new RecordIdentifier(); - while (isDeleteRecordAvailable && currIndex != -1 && currIndex <= lastValidIndex) { + int j = 0; + for (int i = 0; i < size; i++) { + if (!isDeleteRecordAvailable) { + break; + } + + final int currIndex = selected[i]; currRecordIdInBatch.setValues( (originalTransaction != null) ? originalTransaction[currIndex] : repeatedOriginalTransaction, (bucket != null) ? (int) bucket[currIndex] : (int) repeatedBucket, (rowId != null) ? rowId[currIndex] : repeatedRowId); - if (deleteRecordKey.compareRow(currRecordIdInBatch) == 0) { - // When deleteRecordId == currRecordIdInBatch, this record in the batch has been deleted. - selectedBitSet.clear(currIndex); - currIndex = selectedBitSet.nextSetBit(currIndex + 1); // Move to next valid index. - } else if (deleteRecordKey.compareRow(currRecordIdInBatch) == 1) { - // When deleteRecordId > currRecordIdInBatch, we have to move on to look at the - // next record in the batch. - // But before that, can we short-circuit and skip the entire batch itself - // by checking if the deleteRecordId > lastRecordInBatch? - if (deleteRecordKey.compareRow(lastRecordIdInBatch) == 1) { - return; // Yay! We short-circuited, skip everything remaining in the batch and return. + final int compared = deleteRecordKey.compareRow(currRecordIdInBatch); + if (compared != 0) { + selected[j++] = currIndex; + if (compared == 1) { + // When deleteRecordId > currRecordIdInBatch, we have to move on to look at the + // next record in the batch. + // But before that, can we short-circuit and skip the entire batch itself + // by checking if the deleteRecordId > lastRecordInBatch? + if (deleteRecordKey.compareRow(lastRecordIdInBatch) == 1) { + return j; // Yay! We short-circuited, skip everything remaining in the batch and return. + } + } else { + // We have deleteRecordId < currRecordIdInBatch, we must now move on to find + // next the larger deleteRecordId that can possibly match anything in the batch. + isDeleteRecordAvailable = deleteRecords.next(deleteRecordKey, deleteRecordValue); } - currIndex = selectedBitSet.nextSetBit(currIndex + 1); // Move to next valid index. - } else { - // We have deleteRecordId < currRecordIdInBatch, we must now move on to find - // next the larger deleteRecordId that can possibly match anything in the batch. - isDeleteRecordAvailable = deleteRecords.next(deleteRecordKey, deleteRecordValue); } } + return j; } @Override @@ -482,7 +484,7 @@ public void close() throws IOException { * heuristic that prevents creation of an instance of this class if the memory pressure is high. * The SortMergedDeleteEventRegistry is then the fallback method for such scenarios. */ - static class ColumnizedDeleteEventRegistry implements DeleteEventRegistry { + public static class ColumnizedDeleteEventRegistry implements DeleteEventRegistry { /** * A simple wrapper class to hold the (otid, rowId) pair. */ @@ -626,14 +628,22 @@ public int compareTo(CompressedOtid other) { private CompressedOtid compressedOtids[]; private ValidTxnList validTxnList; - public ColumnizedDeleteEventRegistry(JobConf conf, OrcSplit orcSplit, - Reader.Options readerOptions) throws IOException, DeleteEventsOverflowMemoryException { + private ColumnizedDeleteEventRegistry() { + } + + public static ColumnizedDeleteEventRegistry get( + JobConf conf, OrcSplit orcSplit, Reader.Options readerOptions) + throws IOException { + + ColumnizedDeleteEventRegistry registry = new ColumnizedDeleteEventRegistry(); + int bucket = AcidUtils.parseBaseOrDeltaBucketFilename(orcSplit.getPath(), conf).getBucket(); String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); - this.validTxnList = (txnString == null) ? new ValidReadTxnList() : new ValidReadTxnList(txnString); - this.sortMerger = new TreeMap(); - this.rowIds = null; - this.compressedOtids = null; + + registry.validTxnList = (txnString == null) ? new ValidReadTxnList() : new ValidReadTxnList(txnString); + registry.sortMerger = new TreeMap(); + registry.rowIds = null; + registry.compressedOtids = null; int maxEventsInMemory = HiveConf.getIntVar(conf, ConfVars.HIVE_TRANSACTIONAL_NUM_EVENTS_IN_MEMORY); try { @@ -667,13 +677,13 @@ public ColumnizedDeleteEventRegistry(JobConf conf, OrcSplit orcSplit, + deleteDeltaDirs.toString() +". The max limit is currently set at " + maxEventsInMemory + " and can be changed by setting the Hive config variable " + ConfVars.HIVE_TRANSACTIONAL_NUM_EVENTS_IN_MEMORY.varname); - throw new DeleteEventsOverflowMemoryException(); + return null; } DeleteReaderValue deleteReaderValue = new DeleteReaderValue(deleteDeltaReader, - readerOptions, bucket, validTxnList); + readerOptions, bucket, registry.validTxnList); DeleteRecordKey deleteRecordKey = new DeleteRecordKey(); if (deleteReaderValue.next(deleteRecordKey)) { - sortMerger.put(deleteRecordKey, deleteReaderValue); + registry.sortMerger.put(deleteRecordKey, deleteReaderValue); } else { deleteReaderValue.close(); } @@ -681,12 +691,13 @@ public ColumnizedDeleteEventRegistry(JobConf conf, OrcSplit orcSplit, } if (totalDeleteEventCount > 0) { // Initialize the rowId array when we have some delete events. - rowIds = new long[totalDeleteEventCount]; - readAllDeleteEventsFromDeleteDeltas(); + registry.rowIds = new long[totalDeleteEventCount]; + registry.readAllDeleteEventsFromDeleteDeltas(); } } - } catch(IOException|DeleteEventsOverflowMemoryException e) { - close(); // close any open readers, if there was some exception during initialization. + return registry; + } catch(IOException e) { + registry.close(); // close any open readers, if there was some exception during initialization. throw e; // rethrow the exception so that the caller can handle. } } @@ -775,33 +786,34 @@ private boolean isDeleted(long otid, long rowId) { } @Override - public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) + public int findDeletedRecords(ColumnVector[] cols, int size, int[] selected) throws IOException { if (rowIds == null || compressedOtids == null) { - return; + return size; } // Iterate through the batch and for each (otid, rowid) in the batch // check if it is deleted or not. long[] originalTransactionVector = - batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector; + cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null + : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector; long repeatedOriginalTransaction = (originalTransactionVector != null) ? -1 - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0]; + : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0]; long[] rowIdVector = - ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector; - - for (int setBitIndex = selectedBitSet.nextSetBit(0); - setBitIndex >= 0; - setBitIndex = selectedBitSet.nextSetBit(setBitIndex+1)) { - long otid = originalTransactionVector != null ? originalTransactionVector[setBitIndex] - : repeatedOriginalTransaction ; - long rowId = rowIdVector[setBitIndex]; - if (isDeleted(otid, rowId)) { - selectedBitSet.clear(setBitIndex); + ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector; + + int j = 0; + for (int i = 0; i < size; i++) { + final int setBitIndex = selected[i]; + final long otid = originalTransactionVector != null ? originalTransactionVector[setBitIndex] + : repeatedOriginalTransaction ; + final long rowId = rowIdVector[setBitIndex]; + if (!isDeleted(otid, rowId)) { + selected[j++] = setBitIndex; } - } + } + return j; } @Override @@ -815,8 +827,4 @@ public void close() throws IOException { } } } - - static class DeleteEventsOverflowMemoryException extends Exception { - private static final long serialVersionUID = 1L; - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 2120400d8b..2d4d7d4f16 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -258,7 +258,7 @@ public void deriveExplainAttributes() { } public void deriveLlap(Configuration conf, boolean isExecDriver) { - boolean hasLlap = false, hasNonLlap = false, hasAcid = false; + boolean hasLlap = false, hasNonLlap = false; // Assume the IO is enabled on the daemon by default. We cannot reasonably check it here. boolean isLlapOn = HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ENABLED, llapMode); boolean canWrapAny = false, doCheckIfs = false; @@ -278,12 +278,7 @@ public void deriveLlap(Configuration conf, boolean isExecDriver) { boolean isUsingLlapIo = HiveInputFormat.canWrapForLlap( part.getInputFileFormatClass(), doCheckIfs); if (isUsingLlapIo) { - if (part.getTableDesc() != null && - AcidUtils.isTablePropertyTransactional(part.getTableDesc().getProperties())) { - hasAcid = true; - } else { - hasLlap = true; - } + hasLlap = true; } else { hasNonLlap = true; } @@ -296,7 +291,7 @@ public void deriveLlap(Configuration conf, boolean isExecDriver) { } llapIoDesc = deriveLlapIoDescString( - isLlapOn, canWrapAny, hasPathToPartInfo, hasLlap, hasNonLlap, hasAcid); + isLlapOn, canWrapAny, hasPathToPartInfo, hasLlap, hasNonLlap); } private boolean checkVectorizerSupportedTypes(boolean hasLlap) { @@ -321,11 +316,10 @@ private boolean checkVectorizerSupportedTypes(boolean hasLlap) { } private static String deriveLlapIoDescString(boolean isLlapOn, boolean canWrapAny, - boolean hasPathToPartInfo, boolean hasLlap, boolean hasNonLlap, boolean hasAcid) { + boolean hasPathToPartInfo, boolean hasLlap, boolean hasNonLlap) { if (!isLlapOn) return null; // LLAP IO is off, don't output. if (!canWrapAny) return "no inputs"; // Cannot use with input formats. if (!hasPathToPartInfo) return "unknown"; // No information to judge. - if (hasAcid) return "may be used (ACID table)"; return (hasLlap ? (hasNonLlap ? "some inputs" : "all inputs") : "no inputs"); } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java index 6bf13129b8..1957f9d767 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java @@ -247,17 +247,17 @@ public void testCanCreateVectorizedAcidRowBatchReaderOnSplit() throws Exception conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getLegacy().toInt()); // Test false when trying to create a vectorized ACID row batch reader for a legacy table. - assertFalse(VectorizedOrcAcidRowBatchReader.canCreateVectorizedAcidRowBatchReaderOnSplit(conf, mockSplit)); + assertFalse(VectorizedOrcAcidRowBatchReader.isAcid(conf, mockSplit)); conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt()); Mockito.when(mockSplit.isOriginal()).thenReturn(true); // Test false when trying to create a vectorized ACID row batch reader when reading originals. - assertFalse(VectorizedOrcAcidRowBatchReader.canCreateVectorizedAcidRowBatchReaderOnSplit(conf, mockSplit)); + assertFalse(VectorizedOrcAcidRowBatchReader.isAcid(conf, mockSplit)); // A positive test case. Mockito.when(mockSplit.isOriginal()).thenReturn(false); - assertTrue(VectorizedOrcAcidRowBatchReader.canCreateVectorizedAcidRowBatchReaderOnSplit(conf, mockSplit)); + assertTrue(VectorizedOrcAcidRowBatchReader.isAcid(conf, mockSplit)); } } diff --git ql/src/test/queries/clientpositive/llap_acid.q ql/src/test/queries/clientpositive/llap_acid.q index 6bd216a55f..57e40fa260 100644 --- ql/src/test/queries/clientpositive/llap_acid.q +++ ql/src/test/queries/clientpositive/llap_acid.q @@ -27,7 +27,7 @@ select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limi insert into table orc_llap partition (csmallint = 2) select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10; -alter table orc_llap SET TBLPROPERTIES ('transactional'='true'); +alter table orc_llap SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); insert into table orc_llap partition (csmallint = 3) select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10; @@ -51,3 +51,39 @@ select cint, csmallint, cbigint from orc_llap where cint is not null order by csmallint, cint; DROP TABLE orc_llap; + +DROP TABLE orc_llap_v2; + +CREATE TABLE orc_llap_v2 ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); + +insert into table orc_llap_v2 partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10; +insert into table orc_llap_v2 partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10; +insert into table orc_llap_v2 partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10; + +explain +select cint, csmallint, cbigint from orc_llap_v2 where cint is not null order +by csmallint, cint; +select cint, csmallint, cbigint from orc_llap_v2 where cint is not null order +by csmallint, cint; + +insert into table orc_llap_v2 partition (csmallint = 1) values (1, 1, 1, 1); + +update orc_llap_v2 set cbigint = 2 where cint = 1; + +explain +select cint, csmallint, cbigint from orc_llap_v2 where cint is not null order +by csmallint, cint; +select cint, csmallint, cbigint from orc_llap_v2 where cint is not null order +by csmallint, cint; + +DROP TABLE orc_llap_v2; diff --git ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out index 357ae7bdaf..0add12c99d 100644 --- ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out +++ ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out @@ -76,7 +76,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out index 78c907084a..3ba88fa2a6 100644 --- ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out +++ ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out @@ -65,7 +65,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: ROW__ID (type: struct) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Map 7 Map Operator Tree: TableScan @@ -336,7 +336,7 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Map 4 Map Operator Tree: TableScan @@ -526,7 +526,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: ROW__ID (type: struct) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Map 9 Map Operator Tree: TableScan @@ -991,7 +991,7 @@ POSTHOOK: Input: type2_scd_helper@customer #### A masked pattern was here #### 1 ABC Abc Co. OH true NULL 2 DEF Def Co. PA true NULL -3 XYZ Xyz Co. CA false 2017-03-15 +3 XYZ Xyz Co. CA false 2017-03-17 3 XYZ Xyz Co. TX true NULL 4 PDQ Pdq Co. WI true NULL PREHOOK: query: drop table customer diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out index 788854aa04..6799d69b74 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out @@ -106,7 +106,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -200,7 +200,7 @@ STAGE PLANS: Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col3 (type: string) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -386,7 +386,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -479,7 +479,7 @@ STAGE PLANS: Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -675,7 +675,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -770,7 +770,7 @@ STAGE PLANS: Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col4 (type: int) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -894,7 +894,7 @@ STAGE PLANS: Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), _col2 (type: int) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1087,7 +1087,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1181,7 +1181,7 @@ STAGE PLANS: Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int) Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1305,7 +1305,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1500,7 +1500,7 @@ STAGE PLANS: Statistics: Num rows: 800 Data size: 280800 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), 'bar' (type: string) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1596,7 +1596,7 @@ STAGE PLANS: Statistics: Num rows: 1600 Data size: 561600 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), 'bar' (type: string) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/sqlmerge.q.out ql/src/test/results/clientpositive/llap/sqlmerge.q.out index 562d4440ba..69ee0dff14 100644 --- ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -60,7 +60,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: ROW__ID (type: struct) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Map 7 Map Operator Tree: TableScan @@ -301,7 +301,7 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Map 4 Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/llap_acid.q.out ql/src/test/results/clientpositive/llap_acid.q.out index 5970fd78cb..9a77811d0d 100644 --- ql/src/test/results/clientpositive/llap_acid.q.out +++ ql/src/test/results/clientpositive/llap_acid.q.out @@ -50,11 +50,11 @@ POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cbigint SIMPLE [(alltypesorc) POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true') +PREHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@orc_llap PREHOOK: Output: default@orc_llap -POSTHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true') +POSTHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@orc_llap POSTHOOK: Output: default@orc_llap @@ -105,7 +105,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) @@ -230,7 +230,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) @@ -303,3 +303,302 @@ POSTHOOK: query: DROP TABLE orc_llap POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@orc_llap POSTHOOK: Output: default@orc_llap +PREHOOK: query: DROP TABLE orc_llap_v2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_llap_v2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_llap_v2 ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_llap_v2 +POSTHOOK: query: CREATE TABLE orc_llap_v2 ( + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_llap_v2 +PREHOOK: query: insert into table orc_llap_v2 partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_v2@csmallint=1 +POSTHOOK: query: insert into table orc_llap_v2 partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_v2@csmallint=1 +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=1).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=1).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=1).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_v2 partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_v2@csmallint=2 +POSTHOOK: query: insert into table orc_llap_v2 partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_v2@csmallint=2 +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=2).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=2).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=2).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_v2 partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_v2@csmallint=3 +POSTHOOK: query: insert into table orc_llap_v2 partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_v2@csmallint=3 +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=3).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=3).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=3).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: explain +select cint, csmallint, cbigint from orc_llap_v2 where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain +select cint, csmallint, cbigint from orc_llap_v2 where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_llap_v2 + filterExpr: cint is not null (type: boolean) + Statistics: Num rows: 468 Data size: 5641 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 468 Data size: 5641 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 468 Data size: 5641 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 468 Data size: 5641 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 468 Data size: 5641 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 468 Data size: 5641 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap_v2 where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_v2 +PREHOOK: Input: default@orc_llap_v2@csmallint=1 +PREHOOK: Input: default@orc_llap_v2@csmallint=2 +PREHOOK: Input: default@orc_llap_v2@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap_v2 where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_v2 +POSTHOOK: Input: default@orc_llap_v2@csmallint=1 +POSTHOOK: Input: default@orc_llap_v2@csmallint=2 +POSTHOOK: Input: default@orc_llap_v2@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: insert into table orc_llap_v2 partition (csmallint = 1) values (1, 1, 1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_llap_v2@csmallint=1 +POSTHOOK: query: insert into table orc_llap_v2 partition (csmallint = 1) values (1, 1, 1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_llap_v2@csmallint=1 +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=1).cbigint EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=1).cdouble EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=1).cfloat EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: orc_llap_v2 PARTITION(csmallint=1).cint EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: update orc_llap_v2 set cbigint = 2 where cint = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_v2 +PREHOOK: Input: default@orc_llap_v2@csmallint=1 +PREHOOK: Input: default@orc_llap_v2@csmallint=2 +PREHOOK: Input: default@orc_llap_v2@csmallint=3 +PREHOOK: Output: default@orc_llap_v2@csmallint=1 +PREHOOK: Output: default@orc_llap_v2@csmallint=2 +PREHOOK: Output: default@orc_llap_v2@csmallint=3 +POSTHOOK: query: update orc_llap_v2 set cbigint = 2 where cint = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_v2 +POSTHOOK: Input: default@orc_llap_v2@csmallint=1 +POSTHOOK: Input: default@orc_llap_v2@csmallint=2 +POSTHOOK: Input: default@orc_llap_v2@csmallint=3 +POSTHOOK: Output: default@orc_llap_v2@csmallint=1 +POSTHOOK: Output: default@orc_llap_v2@csmallint=2 +POSTHOOK: Output: default@orc_llap_v2@csmallint=3 +PREHOOK: query: explain +select cint, csmallint, cbigint from orc_llap_v2 where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +POSTHOOK: query: explain +select cint, csmallint, cbigint from orc_llap_v2 where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_llap_v2 + filterExpr: cint is not null (type: boolean) + Statistics: Num rows: 647 Data size: 7780 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 647 Data size: 7780 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 647 Data size: 7780 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 647 Data size: 7780 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 647 Data size: 7780 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 647 Data size: 7780 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint, csmallint, cbigint from orc_llap_v2 where cint is not null order +by csmallint, cint +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_v2 +PREHOOK: Input: default@orc_llap_v2@csmallint=1 +PREHOOK: Input: default@orc_llap_v2@csmallint=2 +PREHOOK: Input: default@orc_llap_v2@csmallint=3 +#### A masked pattern was here #### +POSTHOOK: query: select cint, csmallint, cbigint from orc_llap_v2 where cint is not null order +by csmallint, cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_v2 +POSTHOOK: Input: default@orc_llap_v2@csmallint=1 +POSTHOOK: Input: default@orc_llap_v2@csmallint=2 +POSTHOOK: Input: default@orc_llap_v2@csmallint=3 +#### A masked pattern was here #### +-285355633 1 -1241163445 +-109813638 1 -58941842 +1 1 2 +164554497 1 1161977292 +199879534 1 123351087 +246423894 1 -1645852809 +354670578 1 562841852 +455419170 1 1108177470 +665801232 1 480783141 +708885482 1 -1645852809 +-285355633 2 -1241163445 +-109813638 2 -58941842 +164554497 2 1161977292 +199879534 2 123351087 +246423894 2 -1645852809 +354670578 2 562841852 +455419170 2 1108177470 +665801232 2 480783141 +708885482 2 -1645852809 +-923308739 3 -1887561756 +-3728 3 -1887561756 +762 3 -1645852809 +6981 3 -1887561756 +253665376 3 NULL +497728223 3 -1887561756 +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +528534767 3 NULL +PREHOOK: query: DROP TABLE orc_llap_v2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_llap_v2 +PREHOOK: Output: default@orc_llap_v2 +POSTHOOK: query: DROP TABLE orc_llap_v2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_llap_v2 +POSTHOOK: Output: default@orc_llap_v2