diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java index 7c309a4..2da49aa 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java @@ -23,14 +23,13 @@ import java.util.Arrays; import java.util.List; import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import javax.management.ObjectName; import org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool; +import org.apache.hadoop.hive.llap.io.decode.OrcAcidColumnVectorProducer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -66,8 +65,6 @@ import org.apache.hadoop.metrics2.util.MBeans; import com.google.common.primitives.Ints; -import com.google.common.util.concurrent.ListeningExecutorService; -import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.ThreadFactoryBuilder; public class LlapIoImpl implements LlapIo { @@ -80,13 +77,16 @@ // TODO: later, we may have a map private final ColumnVectorProducer orcCvp, genericCvp; + private final ColumnVectorProducer acidOrcCvp; private final ExecutorService executor; private final LlapDaemonCacheMetrics cacheMetrics; private final LlapDaemonIOMetrics ioMetrics; private ObjectName buddyAllocatorMXBean; private final Allocator allocator; + private final Configuration conf; private LlapIoImpl(Configuration conf) throws IOException { + this.conf = conf; String ioMode = HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MODE); boolean useLowLevelCache = LlapIoImpl.MODE_CACHE.equalsIgnoreCase(ioMode); LOG.info("Initializing LLAP IO in {} mode", useLowLevelCache ? LlapIoImpl.MODE_CACHE : "none"); @@ -162,6 +162,8 @@ private LlapIoImpl(Configuration conf) throws IOException { metadataCache, cache, bufferManager, conf, cacheMetrics, ioMetrics); this.genericCvp = isEncodeEnabled ? new GenericColumnVectorProducer( serdeCache, bufferManager, conf, cacheMetrics, ioMetrics) : null; + this.acidOrcCvp = new OrcAcidColumnVectorProducer( + metadataCache, cache, bufferManager, conf, cacheMetrics, ioMetrics); LOG.info("LLAP IO initialized"); registerMXBeans(); @@ -177,7 +179,12 @@ private void registerMXBeans() { InputFormat sourceInputFormat, Deserializer sourceSerDe) { ColumnVectorProducer cvp = genericCvp; if (sourceInputFormat instanceof OrcInputFormat) { - cvp = orcCvp; // Special-case for ORC. + OrcInputFormat orcInputFormat = (OrcInputFormat) sourceInputFormat; + if (orcInputFormat.isAcidRead(conf)) { + cvp = acidOrcCvp; // Special case for ACID ORC. + } else { + cvp = orcCvp; // Special case for non-ACID ORC. + } } else if (cvp == null) { LOG.warn("LLAP encode is disabled; cannot use for " + sourceInputFormat.getClass()); return null; diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcAcidColumnVectorProducer.java llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcAcidColumnVectorProducer.java new file mode 100644 index 0000000..a255448 --- /dev/null +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcAcidColumnVectorProducer.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.io.decode; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.llap.cache.BufferUsageManager; +import org.apache.hadoop.hive.llap.cache.LowLevelCache; +import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; +import org.apache.hadoop.hive.llap.io.api.impl.ColumnVectorBatch; +import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl; +import org.apache.hadoop.hive.llap.io.encoded.OrcEncodedDataReader; +import org.apache.hadoop.hive.llap.io.metadata.OrcMetadataCache; +import org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics; +import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; +import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader; +import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Reporter; +import org.apache.orc.OrcConf; +import org.apache.orc.TypeDescription; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +/** + * OrcAcidColumnVectorProducer produces a ReadPipeline of ORC ACID data for LLAP. + */ +public class OrcAcidColumnVectorProducer implements ColumnVectorProducer { + + private final OrcMetadataCache metadataCache; + private final LowLevelCache baseCache; + private final BufferUsageManager bufferManager; + private final Configuration conf; + private boolean _skipCorrupt; // TODO: get rid of this + private LlapDaemonCacheMetrics cacheMetrics; + private LlapDaemonIOMetrics ioMetrics; + + public OrcAcidColumnVectorProducer( + OrcMetadataCache metadataCache, LowLevelCache baseCache, BufferUsageManager bufferManager, + Configuration conf, LlapDaemonCacheMetrics cacheMetrics, LlapDaemonIOMetrics ioMetrics) { + LlapIoImpl.LOG.info("Initializing ORC ACID column vector producer"); + + this.metadataCache = metadataCache; + this.baseCache = baseCache; + this.bufferManager = bufferManager; + this.conf = conf; + this._skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf); + this.cacheMetrics = cacheMetrics; + this.ioMetrics = ioMetrics; + } + + @Override + public ReadPipeline createReadPipeline( + Consumer consumer, FileSplit split, List columnIds, + SearchArgument sarg, String[] columnNames, QueryFragmentCounters counters, + TypeDescription readerSchema, InputFormat unused0, Deserializer unused1, + Reporter reporter, JobConf job, Map unused2) throws IOException { + cacheMetrics.incrCacheReadRequests(); + OrcEncodedDataConsumer edc; + if (VectorizedOrcAcidRowBatchReader.isAcid(job, split)) { + // If the split is ACID, then use ORC ACID consumer + edc = new OrcAcidEncodedDataConsumer(consumer, columnIds.size(), + _skipCorrupt, counters, ioMetrics, job, split); + } else { + // If the split is non-ACID, then use ORC consumer + edc = new OrcEncodedDataConsumer( + consumer, columnIds.size(), _skipCorrupt, counters, ioMetrics); + } + // Note: we use global conf here and ignore JobConf. + OrcEncodedDataReader reader = new OrcEncodedDataReader(baseCache, bufferManager, + metadataCache, conf, split, columnIds, sarg, columnNames, edc, counters, readerSchema); + edc.init(reader, reader); + return edc; + } +} diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcAcidEncodedDataConsumer.java llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcAcidEncodedDataConsumer.java new file mode 100644 index 0000000..90dbc7c --- /dev/null +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcAcidEncodedDataConsumer.java @@ -0,0 +1,140 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.io.decode; + +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; +import org.apache.hadoop.hive.llap.io.api.impl.ColumnVectorBatch; +import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger; +import org.apache.hadoop.hive.ql.io.orc.OrcSplit; +import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader; +import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer; +import org.apache.hadoop.hive.ql.io.orc.encoded.Reader; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; + +import java.io.IOException; +import java.util.BitSet; + +import static org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.findRecordsWithInvalidTransactionIds; + +/** + * OrcAcidEncodeDataConsumer consumers data after merging the base data with ACID delta data. + */ +public class OrcAcidEncodedDataConsumer extends OrcEncodedDataConsumer implements ReadPipeline { + private final InnerConsumer innerConsumer = new InnerConsumer(); + private final JobConf conf; + private final FileSplit split; + + public OrcAcidEncodedDataConsumer( + Consumer consumer, int size, boolean skipCorrupt, + QueryFragmentCounters counters, LlapDaemonIOMetrics ioMetrics, + JobConf conf, FileSplit split) throws IOException { + super(consumer, size, skipCorrupt, counters, ioMetrics); + this.split = split; + this.conf = conf; + } + + @Override + protected void decodeBatch(Reader.OrcEncodedColumnBatch batch, + Consumer downstreamConsumer) { + innerConsumer.downstreamConsumer = downstreamConsumer; + super.decodeBatch(batch, innerConsumer); + } + + private class InnerConsumer implements Consumer { + Consumer downstreamConsumer; + VectorizedOrcAcidRowBatchReader.DeleteEventRegistry deleteEventRegistry; + + InnerConsumer() { + // Clone readerOptions for deleteEvents. + Reader.Options readerOptions = OrcInputFormat.createOptionsForReader(conf); + readerOptions = OrcRawRecordMerger.createEventOptions(readerOptions); + Reader.Options deleteEventReaderOptions = readerOptions.clone(); + // Set the range on the deleteEventReaderOptions to 0 to INTEGER_MAX because + // we always want to read all the delete delta files. + deleteEventReaderOptions.range(0, Long.MAX_VALUE); + // Disable SARGs for deleteEventReaders, as SARGs have no meaning. + deleteEventReaderOptions.searchArgument(null, null); + OrcSplit orcSplit = (OrcSplit) split; + + try { + try { + // See if we can load all the delete events from all the delete deltas in memory... + deleteEventRegistry = new VectorizedOrcAcidRowBatchReader.ColumnizedDeleteEventRegistry( + conf, orcSplit, deleteEventReaderOptions); + } catch (VectorizedOrcAcidRowBatchReader.DeleteEventsOverflowMemoryException e) { + // If not, then create a set of hanging readers that do sort-merge to find the next smallest + // delete event on-demand. Caps the memory consumption to (some_const * no. of readers). + deleteEventRegistry = new VectorizedOrcAcidRowBatchReader.SortMergedDeleteEventRegistry( + conf, orcSplit, deleteEventReaderOptions); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void consumeData(ColumnVectorBatch data) { + BitSet selectedBitSet = new BitSet(data.size); + + String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); + ValidTxnList validTxnList = (txnString == null) ? new ValidReadTxnList() : new ValidReadTxnList(txnString); + + // Case 1- find rows which belong to transactions that are not valid. + findRecordsWithInvalidTransactionIds(data.cols, data.size, selectedBitSet, validTxnList); + + // Case 2- find rows which have been deleted. + try { + deleteEventRegistry.findDeletedRecords(data.cols, data.size, selectedBitSet); + } catch (IOException e) { + throw new RuntimeException(e); + } + + // Select only not deleted ones + int cardinality = selectedBitSet.cardinality(); + if (cardinality != data.size) { + data.size = cardinality; + for (int setBitIndex = selectedBitSet.nextSetBit(0), selectedItr = 0; + setBitIndex >= 0; + setBitIndex = selectedBitSet.nextSetBit(setBitIndex+1), ++selectedItr) { + for (ColumnVector columnVector : data.cols) { + columnVector.setElement(selectedItr, setBitIndex, columnVector); + } + } + } + + downstreamConsumer.consumeData(data); + } + + @Override + public void setDone() { + downstreamConsumer.setDone(); + } + + @Override + public void setError(Throwable t) { + downstreamConsumer.setError(t); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index aaf2399..793dd29 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -37,85 +37,8 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.udf.SettableUDF; -import org.apache.hadoop.hive.ql.udf.UDAFPercentile; -import org.apache.hadoop.hive.ql.udf.UDFAcos; -import org.apache.hadoop.hive.ql.udf.UDFAscii; -import org.apache.hadoop.hive.ql.udf.UDFAsin; -import org.apache.hadoop.hive.ql.udf.UDFAtan; -import org.apache.hadoop.hive.ql.udf.UDFBase64; -import org.apache.hadoop.hive.ql.udf.UDFBin; -import org.apache.hadoop.hive.ql.udf.UDFChr; -import org.apache.hadoop.hive.ql.udf.UDFConv; -import org.apache.hadoop.hive.ql.udf.UDFCos; -import org.apache.hadoop.hive.ql.udf.UDFCrc32; -import org.apache.hadoop.hive.ql.udf.UDFDateFloorDay; -import org.apache.hadoop.hive.ql.udf.UDFDateFloorHour; -import org.apache.hadoop.hive.ql.udf.UDFDateFloorMinute; -import org.apache.hadoop.hive.ql.udf.UDFDateFloorMonth; -import org.apache.hadoop.hive.ql.udf.UDFDateFloorQuarter; -import org.apache.hadoop.hive.ql.udf.UDFDateFloorSecond; -import org.apache.hadoop.hive.ql.udf.UDFDateFloorWeek; -import org.apache.hadoop.hive.ql.udf.UDFDateFloorYear; -import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth; -import org.apache.hadoop.hive.ql.udf.UDFDayOfWeek; -import org.apache.hadoop.hive.ql.udf.UDFDegrees; -import org.apache.hadoop.hive.ql.udf.UDFE; -import org.apache.hadoop.hive.ql.udf.UDFExp; -import org.apache.hadoop.hive.ql.udf.UDFFindInSet; -import org.apache.hadoop.hive.ql.udf.UDFFromUnixTime; -import org.apache.hadoop.hive.ql.udf.UDFHex; -import org.apache.hadoop.hive.ql.udf.UDFHour; -import org.apache.hadoop.hive.ql.udf.UDFJson; -import org.apache.hadoop.hive.ql.udf.UDFLength; -import org.apache.hadoop.hive.ql.udf.UDFLike; -import org.apache.hadoop.hive.ql.udf.UDFLn; -import org.apache.hadoop.hive.ql.udf.UDFLog; -import org.apache.hadoop.hive.ql.udf.UDFLog10; -import org.apache.hadoop.hive.ql.udf.UDFLog2; -import org.apache.hadoop.hive.ql.udf.UDFMd5; -import org.apache.hadoop.hive.ql.udf.UDFMinute; -import org.apache.hadoop.hive.ql.udf.UDFMonth; -import org.apache.hadoop.hive.ql.udf.UDFOPBitAnd; -import org.apache.hadoop.hive.ql.udf.UDFOPBitNot; -import org.apache.hadoop.hive.ql.udf.UDFOPBitOr; -import org.apache.hadoop.hive.ql.udf.UDFOPBitShiftLeft; -import org.apache.hadoop.hive.ql.udf.UDFOPBitShiftRight; -import org.apache.hadoop.hive.ql.udf.UDFOPBitShiftRightUnsigned; -import org.apache.hadoop.hive.ql.udf.UDFOPBitXor; -import org.apache.hadoop.hive.ql.udf.UDFOPLongDivide; -import org.apache.hadoop.hive.ql.udf.UDFPI; -import org.apache.hadoop.hive.ql.udf.UDFParseUrl; -import org.apache.hadoop.hive.ql.udf.UDFRadians; -import org.apache.hadoop.hive.ql.udf.UDFRand; -import org.apache.hadoop.hive.ql.udf.UDFRegExpExtract; -import org.apache.hadoop.hive.ql.udf.UDFRegExpReplace; -import org.apache.hadoop.hive.ql.udf.UDFRepeat; -import org.apache.hadoop.hive.ql.udf.UDFReplace; -import org.apache.hadoop.hive.ql.udf.UDFReverse; -import org.apache.hadoop.hive.ql.udf.UDFSecond; -import org.apache.hadoop.hive.ql.udf.UDFSha1; -import org.apache.hadoop.hive.ql.udf.UDFSign; -import org.apache.hadoop.hive.ql.udf.UDFSin; -import org.apache.hadoop.hive.ql.udf.UDFSpace; -import org.apache.hadoop.hive.ql.udf.UDFSqrt; -import org.apache.hadoop.hive.ql.udf.UDFSubstr; -import org.apache.hadoop.hive.ql.udf.UDFTan; -import org.apache.hadoop.hive.ql.udf.UDFToBoolean; -import org.apache.hadoop.hive.ql.udf.UDFToByte; -import org.apache.hadoop.hive.ql.udf.UDFToDouble; -import org.apache.hadoop.hive.ql.udf.UDFToFloat; -import org.apache.hadoop.hive.ql.udf.UDFToInteger; -import org.apache.hadoop.hive.ql.udf.UDFToLong; -import org.apache.hadoop.hive.ql.udf.UDFToShort; -import org.apache.hadoop.hive.ql.udf.UDFToString; -import org.apache.hadoop.hive.ql.udf.UDFType; -import org.apache.hadoop.hive.ql.udf.UDFUUID; -import org.apache.hadoop.hive.ql.udf.UDFUnbase64; -import org.apache.hadoop.hive.ql.udf.UDFUnhex; -import org.apache.hadoop.hive.ql.udf.UDFVersion; -import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; -import org.apache.hadoop.hive.ql.udf.UDFYear; +import org.apache.hadoop.hive.ql.udf.*; +import org.apache.hadoop.hive.ql.udf.GenericUDFCharacterLength; import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.ql.udf.ptf.MatchPath.MatchPathResolver; import org.apache.hadoop.hive.ql.udf.ptf.Noop.NoopResolver; @@ -262,7 +185,10 @@ system.registerGenericUDF("trim", GenericUDFTrim.class); system.registerGenericUDF("ltrim", GenericUDFLTrim.class); system.registerGenericUDF("rtrim", GenericUDFRTrim.class); - system.registerUDF("length", UDFLength.class, false); + system.registerGenericUDF("length", UDFLength.class); + system.registerGenericUDF("character_length", GenericUDFCharacterLength.class); + system.registerGenericUDF("char_length", GenericUDFCharacterLength.class); + system.registerGenericUDF("octet_length", GenericUDFOctetLength.class); system.registerUDF("reverse", UDFReverse.class, false); system.registerGenericUDF("field", GenericUDFField.class); system.registerUDF("find_in_set", UDFFindInSet.class, false); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java new file mode 100644 index 0000000..3d85772 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class OctetLength extends VectorExpression { + private static final long serialVersionUID = 1L; + private transient int colNum; + private transient int outputColumn; + + public OctetLength(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public OctetLength() { + super(); + } + + // Calculate the length of the UTF-8 strings in input vector and place results in output vector. + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + int n = batch.size; + int [] length = inputColVector.length; + long[] resultLen = outV.vector; + + if (n == 0) { + //Nothing to do + return; + } + + if (inputColVector.noNulls) { + outV.noNulls = true; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + resultLen[0] = length[0]; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + resultLen[i] = length[i]; + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + resultLen[i] = length[i]; + } + outV.isRepeating = false; + } + } else { + + /* + * Handle case with nulls. Don't do function if the value is null, to save time, + * because calling the function can be expensive. + */ + outV.noNulls = false; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inputColVector.isNull[0]; + if (!inputColVector.isNull[0]) { + resultLen[0] = length[0]; + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!inputColVector.isNull[i]) { + resultLen[i] = length[i]; + } + outV.isNull[i] = inputColVector.isNull[i]; + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + if (!inputColVector.isNull[i]) { + resultLen[i] = length[i]; + } + outV.isNull[i] = inputColVector.isNull[i]; + } + outV.isRepeating = false; + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "Long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 369584b..c6dd270 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -210,6 +210,13 @@ public boolean isAcidRead(Configuration conf, InputSplit inputSplit) { return HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); } + public boolean isAcidRead(Configuration conf) { + /* + * Fallback for the case when OrcSplit flags do not contain hasBase and deltas + */ + return HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); + } + private static class OrcRecordReader implements org.apache.hadoop.mapred.RecordReader, StatsProvidingRecordReader { @@ -1844,8 +1851,7 @@ private static void scheduleSplits(ETLSplitStrategy splitStrategy, Context conte reporter.setStatus(inputSplit.toString()); - boolean isFastVectorizedReaderAvailable = - VectorizedOrcAcidRowBatchReader.canCreateVectorizedAcidRowBatchReaderOnSplit(conf, inputSplit); + boolean isFastVectorizedReaderAvailable = VectorizedOrcAcidRowBatchReader.isAcid(conf, inputSplit); if (vectorMode && isFastVectorizedReaderAvailable) { // Faster vectorized ACID row batch reader is available that avoids row-by-row stitching. @@ -2030,7 +2036,7 @@ static Path findOriginalBucket(FileSystem fs, directory); } - static Reader.Options createOptionsForReader(Configuration conf) { + public static Reader.Options createOptionsForReader(Configuration conf) { /** * Do we have schema on read in the configuration variables? */ diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java index 95b8806..de0f0bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java @@ -399,7 +399,7 @@ private void discoverKeyBounds(Reader reader, * @param options options for the row reader * @return a cloned options object that is modified for the event reader */ - static Reader.Options createEventOptions(Reader.Options options) { + public static Reader.Options createEventOptions(Reader.Options options) { Reader.Options result = options.clone(); result.range(options.getOffset(), Long.MAX_VALUE); result.include(options.getInclude()); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java index d61b24b..eb6231f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java @@ -218,7 +218,7 @@ public long getColumnarProjectionSize() { @Override public boolean canUseLlapIo() { - return isOriginal && (deltas == null || deltas.isEmpty()); + return isOriginal; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index 75c7680..3eba09b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.util.Arrays; import java.util.BitSet; -import java.util.List; import java.util.Map.Entry; import java.util.TreeMap; @@ -32,6 +31,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -42,15 +42,13 @@ import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; -import org.apache.orc.OrcProto; -import org.apache.orc.OrcUtils; -import org.apache.orc.TypeDescription; + +import com.google.common.annotations.VisibleForTesting; import org.apache.orc.impl.AcidStats; import org.apache.orc.impl.OrcAcidUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; /** * A fast vectorized batch reader class for ACID when split-update behavior is enabled. * When split-update is turned on, row-by-row stitching could be avoided to create the final @@ -147,7 +145,7 @@ public VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf, * @param inputSplit * @return true if it is possible, else false. */ - public static boolean canCreateVectorizedAcidRowBatchReaderOnSplit(JobConf conf, InputSplit inputSplit) { + public static boolean isAcid(JobConf conf, InputSplit inputSplit) { if (!(inputSplit instanceof OrcSplit)) { return false; // must be an instance of OrcSplit. } @@ -163,7 +161,7 @@ public static boolean canCreateVectorizedAcidRowBatchReaderOnSplit(JobConf conf, return false; // no split-update or possibly reading originals! } - private static Path[] getDeleteDeltaDirsFromSplit(OrcSplit orcSplit) throws IOException { + public static Path[] getDeleteDeltaDirsFromSplit(OrcSplit orcSplit) throws IOException { Path path = orcSplit.getPath(); Path root; if (orcSplit.hasBase()) { @@ -221,10 +219,12 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti } // Case 1- find rows which belong to transactions that are not valid. - findRecordsWithInvalidTransactionIds(vectorizedRowBatchBase, selectedBitSet); + findRecordsWithInvalidTransactionIds( + vectorizedRowBatchBase.cols, vectorizedRowBatchBase.size, selectedBitSet, validTxnList); // Case 2- find rows which have been deleted. - this.deleteEventRegistry.findDeletedRecords(vectorizedRowBatchBase, selectedBitSet); + this.deleteEventRegistry.findDeletedRecords( + vectorizedRowBatchBase.cols, vectorizedRowBatchBase.size, selectedBitSet); if (selectedBitSet.cardinality() == vectorizedRowBatchBase.size) { // None of the cases above matched and everything is selected. Hence, we will use the @@ -257,19 +257,20 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti return true; } - private void findRecordsWithInvalidTransactionIds(VectorizedRowBatch batch, BitSet selectedBitSet) { - if (batch.cols[OrcRecordUpdater.CURRENT_TRANSACTION].isRepeating) { + public static void findRecordsWithInvalidTransactionIds( + ColumnVector[] cols, int size, BitSet selectedBitSet, ValidTxnList validTxnList) { + if (cols[OrcRecordUpdater.CURRENT_TRANSACTION].isRepeating) { // When we have repeating values, we can unset the whole bitset at once // if the repeating value is not a valid transaction. long currentTransactionIdForBatch = ((LongColumnVector) - batch.cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector[0]; + cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector[0]; if (!validTxnList.isTxnValid(currentTransactionIdForBatch)) { - selectedBitSet.clear(0, batch.size); + selectedBitSet.clear(0, size); } return; } long[] currentTransactionVector = - ((LongColumnVector) batch.cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector; + ((LongColumnVector) cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector; // Loop through the bits that are set to true and mark those rows as false, if their // current transactions are not valid. for (int setBitIndex = selectedBitSet.nextSetBit(0); @@ -278,7 +279,7 @@ private void findRecordsWithInvalidTransactionIds(VectorizedRowBatch batch, BitS if (!validTxnList.isTxnValid(currentTransactionVector[setBitIndex])) { selectedBitSet.clear(setBitIndex); } - } + } } @Override @@ -321,22 +322,24 @@ DeleteEventRegistry getDeleteEventRegistry() { * will read the delete delta files and will create their own internal * data structures to maintain record ids of the records that got deleted. */ - static interface DeleteEventRegistry { + public interface DeleteEventRegistry { /** * Modifies the passed bitset to indicate which of the rows in the batch * have been deleted. Assumes that the batch.size is equal to bitset size. - * @param batch + * @param cols + * @param size * @param selectedBitSet * @throws IOException */ - public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) throws IOException; + public void findDeletedRecords( + ColumnVector[] cols, int size, BitSet selectedBitSet) throws IOException; /** * The close() method can be called externally to signal the implementing classes * to free up resources. * @throws IOException */ - public void close() throws IOException; + void close() throws IOException; } /** @@ -346,7 +349,7 @@ DeleteEventRegistry getDeleteEventRegistry() { * amount of memory usage, given the number of delete delta files. Therefore, this * implementation will be picked up when the memory pressure is high. */ - static class SortMergedDeleteEventRegistry implements DeleteEventRegistry { + public static class SortMergedDeleteEventRegistry implements DeleteEventRegistry { private OrcRawRecordMerger deleteRecords; private OrcRawRecordMerger.ReaderKey deleteRecordKey; private OrcStruct deleteRecordValue; @@ -375,29 +378,29 @@ public SortMergedDeleteEventRegistry(JobConf conf, OrcSplit orcSplit, Reader.Opt } @Override - public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) + public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBitSet) throws IOException { if (!isDeleteRecordAvailable) { return; } long[] originalTransaction = - batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector; + cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null + : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector; long[] bucket = - batch.cols[OrcRecordUpdater.BUCKET].isRepeating ? null - : ((LongColumnVector) batch.cols[OrcRecordUpdater.BUCKET]).vector; + cols[OrcRecordUpdater.BUCKET].isRepeating ? null + : ((LongColumnVector) cols[OrcRecordUpdater.BUCKET]).vector; long[] rowId = - batch.cols[OrcRecordUpdater.ROW_ID].isRepeating ? null - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector; + cols[OrcRecordUpdater.ROW_ID].isRepeating ? null + : ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector; // The following repeatedX values will be set, if any of the columns are repeating. long repeatedOriginalTransaction = (originalTransaction != null) ? -1 - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0]; + : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0]; long repeatedBucket = (bucket != null) ? -1 - : ((LongColumnVector) batch.cols[OrcRecordUpdater.BUCKET]).vector[0]; + : ((LongColumnVector) cols[OrcRecordUpdater.BUCKET]).vector[0]; long repeatedRowId = (rowId != null) ? -1 - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector[0]; + : ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector[0]; // Get the first valid row in the batch still available. @@ -412,7 +415,7 @@ public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) rowId != null ? (int) rowId[firstValidIndex] : repeatedRowId); // Get the last valid row in the batch still available. - int lastValidIndex = selectedBitSet.previousSetBit(batch.size - 1); + int lastValidIndex = selectedBitSet.previousSetBit(size - 1); RecordIdentifier lastRecordIdInBatch = new RecordIdentifier( originalTransaction != null ? originalTransaction[lastValidIndex] : repeatedOriginalTransaction, @@ -482,7 +485,7 @@ public void close() throws IOException { * heuristic that prevents creation of an instance of this class if the memory pressure is high. * The SortMergedDeleteEventRegistry is then the fallback method for such scenarios. */ - static class ColumnizedDeleteEventRegistry implements DeleteEventRegistry { + public static class ColumnizedDeleteEventRegistry implements DeleteEventRegistry { /** * A simple wrapper class to hold the (otid, rowId) pair. */ @@ -626,8 +629,9 @@ public int compareTo(CompressedOtid other) { private CompressedOtid compressedOtids[]; private ValidTxnList validTxnList; - public ColumnizedDeleteEventRegistry(JobConf conf, OrcSplit orcSplit, - Reader.Options readerOptions) throws IOException, DeleteEventsOverflowMemoryException { + public ColumnizedDeleteEventRegistry( + JobConf conf, OrcSplit orcSplit, Reader.Options readerOptions) + throws IOException, DeleteEventsOverflowMemoryException { int bucket = AcidUtils.parseBaseOrDeltaBucketFilename(orcSplit.getPath(), conf).getBucket(); String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); this.validTxnList = (txnString == null) ? new ValidReadTxnList() : new ValidReadTxnList(txnString); @@ -775,7 +779,7 @@ private boolean isDeleted(long otid, long rowId) { } @Override - public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) + public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBitSet) throws IOException { if (rowIds == null || compressedOtids == null) { return; @@ -784,13 +788,13 @@ public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) // check if it is deleted or not. long[] originalTransactionVector = - batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector; + cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null + : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector; long repeatedOriginalTransaction = (originalTransactionVector != null) ? -1 - : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0]; + : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0]; long[] rowIdVector = - ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector; + ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector; for (int setBitIndex = selectedBitSet.nextSetBit(0); setBitIndex >= 0; @@ -801,7 +805,7 @@ public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) if (isDeleted(otid, rowId)) { selectedBitSet.clear(setBitIndex); } - } + } } @Override @@ -816,7 +820,7 @@ public void close() throws IOException { } } - static class DeleteEventsOverflowMemoryException extends Exception { + public static class DeleteEventsOverflowMemoryException extends Exception { private static final long serialVersionUID = 1L; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index fadbc20..bac2a82 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -37,16 +37,14 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.calcite.util.Pair; -import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.hadoop.hive.ql.udf.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; -import org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.spark.SparkTask; import org.apache.hadoop.hive.ql.exec.tez.TezTask; @@ -70,10 +68,8 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; -import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.HiveVectorAdaptorUsageMode; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType; @@ -107,16 +103,13 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; -import org.apache.hadoop.hive.ql.plan.HashTableSinkDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.LimitDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; @@ -130,7 +123,6 @@ import org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorLimitDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; -import org.apache.hadoop.hive.ql.plan.VectorPartitionConversion; import org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; @@ -149,62 +141,17 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.OperatorVariation; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; -import org.apache.hadoop.hive.ql.udf.UDFAcos; -import org.apache.hadoop.hive.ql.udf.UDFAsin; -import org.apache.hadoop.hive.ql.udf.UDFAtan; -import org.apache.hadoop.hive.ql.udf.UDFBin; -import org.apache.hadoop.hive.ql.udf.UDFConv; -import org.apache.hadoop.hive.ql.udf.UDFCos; -import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth; -import org.apache.hadoop.hive.ql.udf.UDFDayOfWeek; -import org.apache.hadoop.hive.ql.udf.UDFDegrees; -import org.apache.hadoop.hive.ql.udf.UDFExp; -import org.apache.hadoop.hive.ql.udf.UDFFromUnixTime; -import org.apache.hadoop.hive.ql.udf.UDFHex; -import org.apache.hadoop.hive.ql.udf.UDFHour; -import org.apache.hadoop.hive.ql.udf.UDFLength; -import org.apache.hadoop.hive.ql.udf.UDFLike; -import org.apache.hadoop.hive.ql.udf.UDFLn; -import org.apache.hadoop.hive.ql.udf.UDFLog; -import org.apache.hadoop.hive.ql.udf.UDFLog10; -import org.apache.hadoop.hive.ql.udf.UDFLog2; -import org.apache.hadoop.hive.ql.udf.UDFMinute; -import org.apache.hadoop.hive.ql.udf.UDFMonth; -import org.apache.hadoop.hive.ql.udf.UDFRadians; -import org.apache.hadoop.hive.ql.udf.UDFRand; -import org.apache.hadoop.hive.ql.udf.UDFRegExpExtract; -import org.apache.hadoop.hive.ql.udf.UDFRegExpReplace; -import org.apache.hadoop.hive.ql.udf.UDFSecond; -import org.apache.hadoop.hive.ql.udf.UDFSign; -import org.apache.hadoop.hive.ql.udf.UDFSin; -import org.apache.hadoop.hive.ql.udf.UDFSqrt; -import org.apache.hadoop.hive.ql.udf.UDFSubstr; -import org.apache.hadoop.hive.ql.udf.UDFTan; -import org.apache.hadoop.hive.ql.udf.UDFToBoolean; -import org.apache.hadoop.hive.ql.udf.UDFToByte; -import org.apache.hadoop.hive.ql.udf.UDFToDouble; -import org.apache.hadoop.hive.ql.udf.UDFToFloat; -import org.apache.hadoop.hive.ql.udf.UDFToInteger; -import org.apache.hadoop.hive.ql.udf.UDFToLong; -import org.apache.hadoop.hive.ql.udf.UDFToShort; -import org.apache.hadoop.hive.ql.udf.UDFToString; -import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; -import org.apache.hadoop.hive.ql.udf.UDFYear; import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.NullStructSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -218,8 +165,6 @@ import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hive.common.util.AnnotationUtils; -import org.apache.hive.common.util.HiveStringUtils; -import org.apache.hive.common.util.ReflectionUtil; import com.google.common.base.Preconditions; @@ -335,6 +280,8 @@ public Vectorizer() { supportedGenericUDFs.add(GenericUDFOPAnd.class); supportedGenericUDFs.add(GenericUDFOPEqual.class); supportedGenericUDFs.add(UDFLength.class); + supportedGenericUDFs.add(GenericUDFCharacterLength.class); + supportedGenericUDFs.add(GenericUDFOctetLength.class); supportedGenericUDFs.add(UDFYear.class); supportedGenericUDFs.add(UDFMonth.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index d4bdd96..2cc5987 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -254,7 +254,7 @@ public void deriveExplainAttributes() { } public void deriveLlap(Configuration conf, boolean isExecDriver) { - boolean hasLlap = false, hasNonLlap = false, hasAcid = false; + boolean hasLlap = false, hasNonLlap = false; // Assume the IO is enabled on the daemon by default. We cannot reasonably check it here. boolean isLlapOn = HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ENABLED, llapMode); boolean canWrapAny = false, doCheckIfs = false; @@ -274,12 +274,7 @@ public void deriveLlap(Configuration conf, boolean isExecDriver) { boolean isUsingLlapIo = HiveInputFormat.canWrapForLlap( part.getInputFileFormatClass(), doCheckIfs); if (isUsingLlapIo) { - if (part.getTableDesc() != null && - AcidUtils.isTablePropertyTransactional(part.getTableDesc().getProperties())) { - hasAcid = true; - } else { - hasLlap = true; - } + hasLlap = true; } else { hasNonLlap = true; } @@ -292,7 +287,7 @@ public void deriveLlap(Configuration conf, boolean isExecDriver) { } llapIoDesc = deriveLlapIoDescString( - isLlapOn, canWrapAny, hasPathToPartInfo, hasLlap, hasNonLlap, hasAcid); + isLlapOn, canWrapAny, hasPathToPartInfo, hasLlap, hasNonLlap); } private boolean checkVectorizerSupportedTypes(boolean hasLlap) { @@ -317,11 +312,10 @@ private boolean checkVectorizerSupportedTypes(boolean hasLlap) { } private static String deriveLlapIoDescString(boolean isLlapOn, boolean canWrapAny, - boolean hasPathToPartInfo, boolean hasLlap, boolean hasNonLlap, boolean hasAcid) { + boolean hasPathToPartInfo, boolean hasLlap, boolean hasNonLlap) { if (!isLlapOn) return null; // LLAP IO is off, don't output. if (!canWrapAny) return "no inputs"; // Cannot use with input formats. if (!hasPathToPartInfo) return "unknown"; // No information to judge. - if (hasAcid) return "may be used (ACID table)"; return (hasLlap ? (hasNonLlap ? "some inputs" : "all inputs") : "no inputs"); } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFCharacterLength.java ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFCharacterLength.java new file mode 100644 index 0000000..48c3e3f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFCharacterLength.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLength; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IntWritable; + +@Description(name = "character_length,char_length", + value = "_FUNC_(str | binary) - Returns the number of characters in str or binary data", + extended = "Example:\n" + + " > SELECT _FUNC_('안녕하세요') FROM src LIMIT 1;\n" + " 5") +@VectorizedExpressions({StringLength.class}) +public class GenericUDFCharacterLength extends GenericUDF { + private final IntWritable result = new IntWritable(); + private transient PrimitiveObjectInspector argumentOI; + private transient PrimitiveObjectInspectorConverter.StringConverter stringConverter; + private transient boolean isInputString; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException( + "CHARACTER_LENGTH requires 1 argument, got " + arguments.length); + } + + if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentException( + "CHARACTER_LENGTH only takes primitive types, got " + argumentOI.getTypeName()); + } + argumentOI = (PrimitiveObjectInspector) arguments[0]; + + stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI); + PrimitiveObjectInspector.PrimitiveCategory inputType = argumentOI.getPrimitiveCategory(); + ObjectInspector outputOI = null; + switch (inputType) { + case CHAR: + case VARCHAR: + case STRING: + isInputString = true; + break; + + case BINARY: + isInputString = false; + break; + + default: + throw new UDFArgumentException( + " CHARACTER_LENGTH() only takes STRING/CHAR/VARCHAR/BINARY types as first argument, got " + + inputType); + } + + outputOI = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + byte[] data = null; + if (isInputString) { + String val = null; + if (arguments[0] != null) { + val = (String) stringConverter.convert(arguments[0].get()); + } + if (val == null) { + return null; + } + + data = val.getBytes(); + } else { + BytesWritable val = null; + if (arguments[0] != null) { + val = (BytesWritable) arguments[0].get(); + } + if (val == null) { + return null; + } + + data = val.getBytes(); + } + + int len = 0; + for (int i = 0; i < data.length; i++) { + if (GenericUDFUtils.isUtfStartByte(data[i])) { + len++; + } + } + result.set(len); + return result; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("character_length", children); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFOctetLength.java ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFOctetLength.java new file mode 100644 index 0000000..a5a37ee --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFOctetLength.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.OctetLength; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IntWritable; + +@Description(name = "octet_length", + value = "_FUNC_(str | binary) - Returns the number of bytes in str or binary data", + extended = "Example:\n" + + " > SELECT _FUNC_('안녕하세요') FROM src LIMIT 1;\n" + " 15") +@VectorizedExpressions({OctetLength.class}) +public class GenericUDFOctetLength extends GenericUDF { + private final IntWritable result = new IntWritable(); + private transient PrimitiveObjectInspector argumentOI; + private transient PrimitiveObjectInspectorConverter.StringConverter stringConverter; + private transient boolean isInputString; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException( + "OCTET_LENGTH requires 1 argument, got " + arguments.length); + } + + if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentException( + "OCTET_LENGTH only takes primitive types, got " + argumentOI.getTypeName()); + } + argumentOI = (PrimitiveObjectInspector) arguments[0]; + + stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI); + PrimitiveObjectInspector.PrimitiveCategory inputType = argumentOI.getPrimitiveCategory(); + ObjectInspector outputOI = null; + switch (inputType) { + case CHAR: + case VARCHAR: + case STRING: + isInputString = true; + break; + + case BINARY: + isInputString = false; + break; + + default: + throw new UDFArgumentException( + " OCTET_LENGTH() only takes STRING/CHAR/VARCHAR/BINARY types as first argument, got " + + inputType); + } + + outputOI = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + return outputOI; + } + + @Override + public Object evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException { + byte[] data = null; + if (isInputString) { + String val = null; + if (arguments[0] != null) { + val = (String) stringConverter.convert(arguments[0].get()); + } + if (val == null) { + return null; + } + + data = val.getBytes(); + } else { + BytesWritable val = null; + if (arguments[0] != null) { + val = (BytesWritable) arguments[0].get(); + } + if (val == null) { + return null; + } + + data = val.getBytes(); + } + + result.set(data.length); + return result; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("octet_length", children); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLength.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLength.java index 4bdcb0e..24a4601 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLength.java @@ -18,13 +18,19 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLength; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.Text; /** * UDFLength. @@ -35,32 +41,84 @@ extended = "Example:\n" + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 8") @VectorizedExpressions({StringLength.class}) -public class UDFLength extends UDF { +public class UDFLength extends GenericUDF { private final IntWritable result = new IntWritable(); + private transient PrimitiveObjectInspector argumentOI; + private transient PrimitiveObjectInspectorConverter.StringConverter stringConverter; + private transient boolean isInputString; - public IntWritable evaluate(Text s) { - if (s == null) { - return null; + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException( + "LENGTH requires 1 argument, got " + arguments.length); + } + + if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentException( + "LENGTH only takes primitive types, got " + argumentOI.getTypeName()); + } + argumentOI = (PrimitiveObjectInspector) arguments[0]; + + stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI); + PrimitiveObjectInspector.PrimitiveCategory inputType = argumentOI.getPrimitiveCategory(); + ObjectInspector outputOI = null; + switch (inputType) { + case CHAR: + case VARCHAR: + case STRING: + isInputString = true; + break; + + case BINARY: + isInputString = false; + break; + + default: + throw new UDFArgumentException( + " LENGTH() only takes STRING/CHAR/VARCHAR/BINARY types as first argument, got " + + inputType); + } + + outputOI = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + byte[] data = null; + if (isInputString) { + String val = null; + if (arguments[0] != null) { + val = (String) stringConverter.convert(arguments[0].get()); + } + if (val == null) { + return null; + } + + data = val.getBytes(); + } else { + BytesWritable val = null; + if (arguments[0] != null) { + val = (BytesWritable) arguments[0].get(); + } + if (val == null) { + return null; + } } - byte[] data = s.getBytes(); int len = 0; - for (int i = 0; i < s.getLength(); i++) { + for (int i = 0; i < data.length; i++) { if (GenericUDFUtils.isUtfStartByte(data[i])) { len++; } } - result.set(len); return result; } - public IntWritable evaluate(BytesWritable bw){ - if (bw == null){ - return null; - -} - result.set(bw.getLength()); - return result; + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("length", children); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java index 6bf1312..1957f9d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java @@ -247,17 +247,17 @@ public void testCanCreateVectorizedAcidRowBatchReaderOnSplit() throws Exception conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getLegacy().toInt()); // Test false when trying to create a vectorized ACID row batch reader for a legacy table. - assertFalse(VectorizedOrcAcidRowBatchReader.canCreateVectorizedAcidRowBatchReaderOnSplit(conf, mockSplit)); + assertFalse(VectorizedOrcAcidRowBatchReader.isAcid(conf, mockSplit)); conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt()); Mockito.when(mockSplit.isOriginal()).thenReturn(true); // Test false when trying to create a vectorized ACID row batch reader when reading originals. - assertFalse(VectorizedOrcAcidRowBatchReader.canCreateVectorizedAcidRowBatchReaderOnSplit(conf, mockSplit)); + assertFalse(VectorizedOrcAcidRowBatchReader.isAcid(conf, mockSplit)); // A positive test case. Mockito.when(mockSplit.isOriginal()).thenReturn(false); - assertTrue(VectorizedOrcAcidRowBatchReader.canCreateVectorizedAcidRowBatchReaderOnSplit(conf, mockSplit)); + assertTrue(VectorizedOrcAcidRowBatchReader.isAcid(conf, mockSplit)); } } diff --git ql/src/test/queries/clientpositive/llap_acid.q ql/src/test/queries/clientpositive/llap_acid.q index 6bd216a..41d86af 100644 --- ql/src/test/queries/clientpositive/llap_acid.q +++ ql/src/test/queries/clientpositive/llap_acid.q @@ -27,7 +27,7 @@ select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limi insert into table orc_llap partition (csmallint = 2) select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10; -alter table orc_llap SET TBLPROPERTIES ('transactional'='true'); +alter table orc_llap SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); insert into table orc_llap partition (csmallint = 3) select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10; diff --git ql/src/test/queries/clientpositive/udf_character_length.q ql/src/test/queries/clientpositive/udf_character_length.q new file mode 100644 index 0000000..44447ba --- /dev/null +++ ql/src/test/queries/clientpositive/udf_character_length.q @@ -0,0 +1,24 @@ +set hive.fetch.task.conversion=more; + +DESCRIBE FUNCTION character_length; +DESCRIBE FUNCTION EXTENDED character_length; + +DESCRIBE FUNCTION char_length; +DESCRIBE FUNCTION EXTENDED char_length; + +CREATE TABLE dest1(len INT); +EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value); +FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value); +SELECT dest1.* FROM dest1; +DROP TABLE dest1; + +-- Test with non-ascii characters. +CREATE TABLE dest1(name STRING) STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1; +INSERT INTO dest1 VALUES(NULL); + +EXPLAIN SELECT character_length(dest1.name) FROM dest1; +SELECT character_length(dest1.name) FROM dest1; + +EXPLAIN SELECT char_length(dest1.name) FROM dest1; +SELECT char_length(dest1.name) FROM dest1; diff --git ql/src/test/queries/clientpositive/udf_octet_length.q ql/src/test/queries/clientpositive/udf_octet_length.q new file mode 100644 index 0000000..6a8f200 --- /dev/null +++ ql/src/test/queries/clientpositive/udf_octet_length.q @@ -0,0 +1,17 @@ +set hive.fetch.task.conversion=more; + +DESCRIBE FUNCTION octet_length; +DESCRIBE FUNCTION EXTENDED octet_length; + +CREATE TABLE dest1(len INT); +EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value); +FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value); +SELECT dest1.* FROM dest1; +DROP TABLE dest1; + +-- Test with non-ascii characters. +CREATE TABLE dest1(name STRING) STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1; +INSERT INTO dest1 VALUES(NULL); +EXPLAIN SELECT octet_length(dest1.name) FROM dest1; +SELECT octet_length(dest1.name) FROM dest1; diff --git ql/src/test/queries/clientpositive/vector_udf_character_length.q ql/src/test/queries/clientpositive/vector_udf_character_length.q new file mode 100644 index 0000000..1219d36 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_udf_character_length.q @@ -0,0 +1,25 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +DESCRIBE FUNCTION character_length; +DESCRIBE FUNCTION EXTENDED character_length; + +DESCRIBE FUNCTION char_length; +DESCRIBE FUNCTION EXTENDED char_length; + +CREATE TABLE dest1(len INT); +EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value); +FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value); +SELECT dest1.* FROM dest1; +DROP TABLE dest1; + +-- Test with non-ascii characters. +CREATE TABLE dest1(name STRING) STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1; +INSERT INTO dest1 VALUES(NULL); + +EXPLAIN SELECT character_length(dest1.name) FROM dest1; +SELECT character_length(dest1.name) FROM dest1; + +EXPLAIN SELECT char_length(dest1.name) FROM dest1; +SELECT char_length(dest1.name) FROM dest1; diff --git ql/src/test/queries/clientpositive/vector_udf_octet_length.q ql/src/test/queries/clientpositive/vector_udf_octet_length.q new file mode 100644 index 0000000..cfac289 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_udf_octet_length.q @@ -0,0 +1,18 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +DESCRIBE FUNCTION octet_length; +DESCRIBE FUNCTION EXTENDED octet_length; + +CREATE TABLE dest1(len INT); +EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value); +FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value); +SELECT dest1.* FROM dest1; +DROP TABLE dest1; + +-- Test with non-ascii characters. +CREATE TABLE dest1(name STRING) STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1; +INSERT INTO dest1 VALUES(NULL); +EXPLAIN SELECT octet_length(dest1.name) FROM dest1; +SELECT octet_length(dest1.name) FROM dest1; diff --git ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out index d05bf64..e54fdec 100644 --- ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out +++ ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out @@ -76,7 +76,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out index b7679f1..1012ad9 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out @@ -106,7 +106,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -199,7 +199,7 @@ STAGE PLANS: Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col3 (type: string) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -388,7 +388,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -480,7 +480,7 @@ STAGE PLANS: Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -678,7 +678,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -772,7 +772,7 @@ STAGE PLANS: Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col4 (type: int) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -895,7 +895,7 @@ STAGE PLANS: Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), _col2 (type: int) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1091,7 +1091,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1184,7 +1184,7 @@ STAGE PLANS: Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int) Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1306,7 +1306,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1503,7 +1503,7 @@ STAGE PLANS: Statistics: Num rows: 800 Data size: 280800 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), 'bar' (type: string) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1597,7 +1597,7 @@ STAGE PLANS: Statistics: Num rows: 1600 Data size: 561600 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), 'bar' (type: string) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/sqlmerge.q.out ql/src/test/results/clientpositive/llap/sqlmerge.q.out index 2a3d7db..0f316f9 100644 --- ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -60,7 +60,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: ROW__ID (type: struct) Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Map 7 Map Operator Tree: TableScan @@ -298,7 +298,7 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: llap - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Map 4 Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/llap_acid.q.out ql/src/test/results/clientpositive/llap_acid.q.out index 5970fd7..e53c3b9 100644 --- ql/src/test/results/clientpositive/llap_acid.q.out +++ ql/src/test/results/clientpositive/llap_acid.q.out @@ -50,11 +50,11 @@ POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cbigint SIMPLE [(alltypesorc) POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true') +PREHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@orc_llap PREHOOK: Output: default@orc_llap -POSTHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true') +POSTHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@orc_llap POSTHOOK: Output: default@orc_llap @@ -105,7 +105,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) @@ -230,7 +230,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized - LLAP IO: may be used (ACID table) + LLAP IO: all inputs Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out index 3c9bb4a..2c327c7 100644 --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -43,6 +43,8 @@ case cbrt ceil ceiling +char_length +character_length chr coalesce collect_list @@ -168,6 +170,7 @@ not ntile nullif nvl +octet_length or parse_url parse_url_tuple @@ -269,6 +272,8 @@ case cbrt ceil ceiling +char_length +character_length chr coalesce collect_list diff --git ql/src/test/results/clientpositive/udf_character_length.q.out ql/src/test/results/clientpositive/udf_character_length.q.out new file mode 100644 index 0000000..a2d92f5 --- /dev/null +++ ql/src/test/results/clientpositive/udf_character_length.q.out @@ -0,0 +1,261 @@ +PREHOOK: query: DESCRIBE FUNCTION character_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION character_length +POSTHOOK: type: DESCFUNCTION +character_length(str | binary) - Returns the number of characters in str or binary data +PREHOOK: query: DESCRIBE FUNCTION EXTENDED character_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED character_length +POSTHOOK: type: DESCFUNCTION +character_length(str | binary) - Returns the number of characters in str or binary data +Synonyms: char_length +Example: + > SELECT character_length('HUX8�') FROM src LIMIT 1; + 5 +Function class:org.apache.hadoop.hive.ql.udf.GenericUDFCharacterLength +Function type:BUILTIN +PREHOOK: query: DESCRIBE FUNCTION char_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION char_length +POSTHOOK: type: DESCFUNCTION +char_length(str | binary) - Returns the number of characters in str or binary data +PREHOOK: query: DESCRIBE FUNCTION EXTENDED char_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED char_length +POSTHOOK: type: DESCFUNCTION +char_length(str | binary) - Returns the number of characters in str or binary data +Synonyms: character_length +Example: + > SELECT char_length('HUX8�') FROM src LIMIT 1; + 5 +Function class:org.apache.hadoop.hive.ql.udf.GenericUDFCharacterLength +Function type:BUILTIN +PREHOOK: query: CREATE TABLE dest1(len INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(len INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: character_length(value) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +7 +0 +7 +6 +7 +7 +7 +7 +6 +7 +7 +7 +7 +7 +7 +0 +0 +6 +0 +7 +7 +7 +0 +0 +0 +PREHOOK: query: DROP TABLE dest1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: DROP TABLE dest1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dest1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dest1 +PREHOOK: query: INSERT INTO dest1 VALUES(NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT INTO dest1 VALUES(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN SELECT character_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT character_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: character_length(name) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: SELECT character_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT character_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL +2 +PREHOOK: query: EXPLAIN SELECT char_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT char_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: character_length(name) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: SELECT char_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT char_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL +2 diff --git ql/src/test/results/clientpositive/udf_octet_length.q.out ql/src/test/results/clientpositive/udf_octet_length.q.out new file mode 100644 index 0000000..035d90e --- /dev/null +++ ql/src/test/results/clientpositive/udf_octet_length.q.out @@ -0,0 +1,213 @@ +PREHOOK: query: DESCRIBE FUNCTION octet_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION octet_length +POSTHOOK: type: DESCFUNCTION +octet_length(str | binary) - Returns the number of bytes in str or binary data +PREHOOK: query: DESCRIBE FUNCTION EXTENDED octet_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED octet_length +POSTHOOK: type: DESCFUNCTION +octet_length(str | binary) - Returns the number of bytes in str or binary data +Example: + > SELECT octet_length('HUX8�') FROM src LIMIT 1; + 15 +Function class:org.apache.hadoop.hive.ql.udf.GenericUDFOctetLength +Function type:BUILTIN +PREHOOK: query: CREATE TABLE dest1(len INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(len INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: octet_length(value) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +7 +0 +7 +6 +7 +7 +7 +7 +6 +7 +7 +7 +7 +7 +7 +0 +0 +6 +0 +7 +7 +7 +0 +0 +0 +PREHOOK: query: DROP TABLE dest1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: DROP TABLE dest1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dest1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dest1 +PREHOOK: query: INSERT INTO dest1 VALUES(NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT INTO dest1 VALUES(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN SELECT octet_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT octet_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: octet_length(name) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: SELECT octet_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT octet_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL +6 diff --git ql/src/test/results/clientpositive/vector_udf_character_length.q.out ql/src/test/results/clientpositive/vector_udf_character_length.q.out new file mode 100644 index 0000000..02a9bc4 --- /dev/null +++ ql/src/test/results/clientpositive/vector_udf_character_length.q.out @@ -0,0 +1,285 @@ +PREHOOK: query: DESCRIBE FUNCTION character_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION character_length +POSTHOOK: type: DESCFUNCTION +character_length(str | binary) - Returns the number of characters in str or binary data +PREHOOK: query: DESCRIBE FUNCTION EXTENDED character_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED character_length +POSTHOOK: type: DESCFUNCTION +character_length(str | binary) - Returns the number of characters in str or binary data +Synonyms: char_length +Example: + > SELECT character_length('HUX8�') FROM src LIMIT 1; + 5 +Function class:org.apache.hadoop.hive.ql.udf.GenericUDFCharacterLength +Function type:BUILTIN +PREHOOK: query: DESCRIBE FUNCTION char_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION char_length +POSTHOOK: type: DESCFUNCTION +char_length(str | binary) - Returns the number of characters in str or binary data +PREHOOK: query: DESCRIBE FUNCTION EXTENDED char_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED char_length +POSTHOOK: type: DESCFUNCTION +char_length(str | binary) - Returns the number of characters in str or binary data +Synonyms: character_length +Example: + > SELECT char_length('HUX8�') FROM src LIMIT 1; + 5 +Function class:org.apache.hadoop.hive.ql.udf.GenericUDFCharacterLength +Function type:BUILTIN +PREHOOK: query: CREATE TABLE dest1(len INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(len INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: character_length(value) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +7 +0 +7 +6 +7 +7 +7 +7 +6 +7 +7 +7 +7 +7 +7 +0 +0 +6 +0 +7 +7 +7 +0 +0 +0 +PREHOOK: query: DROP TABLE dest1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: DROP TABLE dest1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dest1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dest1 +PREHOOK: query: INSERT INTO dest1 VALUES(NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT INTO dest1 VALUES(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN SELECT character_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT character_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: character_length(name) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT character_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT character_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL +2 +PREHOOK: query: EXPLAIN SELECT char_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT char_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: character_length(name) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT char_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT char_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL +2 diff --git ql/src/test/results/clientpositive/vector_udf_octet_length.q.out ql/src/test/results/clientpositive/vector_udf_octet_length.q.out new file mode 100644 index 0000000..5436fa6 --- /dev/null +++ ql/src/test/results/clientpositive/vector_udf_octet_length.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: DESCRIBE FUNCTION octet_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION octet_length +POSTHOOK: type: DESCFUNCTION +octet_length(str | binary) - Returns the number of bytes in str or binary data +PREHOOK: query: DESCRIBE FUNCTION EXTENDED octet_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED octet_length +POSTHOOK: type: DESCFUNCTION +octet_length(str | binary) - Returns the number of bytes in str or binary data +Example: + > SELECT octet_length('HUX8�') FROM src LIMIT 1; + 15 +Function class:org.apache.hadoop.hive.ql.udf.GenericUDFOctetLength +Function type:BUILTIN +PREHOOK: query: CREATE TABLE dest1(len INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(len INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: octet_length(value) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +7 +0 +7 +6 +7 +7 +7 +7 +6 +7 +7 +7 +7 +7 +7 +0 +0 +6 +0 +7 +7 +7 +0 +0 +0 +PREHOOK: query: DROP TABLE dest1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: DROP TABLE dest1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dest1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dest1 +PREHOOK: query: INSERT INTO dest1 VALUES(NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT INTO dest1 VALUES(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN SELECT octet_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT octet_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: octet_length(name) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT octet_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT octet_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL +6