diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 02367eb..c8fd9e1 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2936,6 +2936,22 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Exceeding this will trigger a flush irrelevant of memory pressure condition."), HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT("hive.vectorized.groupby.flush.percent", (float) 0.1, "Percent of entries in the group by aggregation hash flushed when the memory threshold is exceeded."), + HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED( + "hive.vectorized.execution.groupby.native.enabled", true, + "This flag should be set to true to enable the native vectorization of queries using GroupBy.\n" + + "The default value is true."), + HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_OVERRIDE( + "hive.test.vectorized.execution.groupby.native.override", + "none", new StringSet("none", "enable", "disable"), + "internal use only, used to override the hive.vectorized.execution.groupby.native.enabled\n" + + "setting and force it on or off. The default is do nothing, of course", + true), + HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_MAX_MEMORY_AVAILABLE( + "hive.test.vectorized.groupby.native.max.memory.available", -1, + "internal use only, used for creating different vectorized hash table sizes\n" + + "to exercise more logic\n" + + "The default value is -1 which means don't use it", + true), HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED("hive.vectorized.execution.reducesink.new.enabled", true, "This flag should be set to true to enable the new vectorization\n" + "of queries using ReduceSink.\ni" + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java new file mode 100644 index 0000000..5cce41e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java @@ -0,0 +1,170 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby; + +import java.lang.reflect.Constructor; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; + +/** + * This class is common operator class of Native Vectorized GroupBy that has common + * initialization logic. + */ +public abstract class VectorGroupByCommon + extends Operator + implements VectorizationContextRegion, VectorizationOperator { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorGroupByCommon.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected VectorGroupByDesc vectorDesc; + + protected VectorGroupByInfo vectorGroupByInfo; + + protected VectorizationContext vContext; + + // Create a new outgoing vectorization context because column name map will change. + protected VectorizationContext vOutContext; + + protected VectorExpression[] groupByKeyExpressions; + + protected VectorAggregationDesc[] vectorAggregationDescs; + + protected AggregationVariation aggregationVariation; + protected SingleCountAggregation singleCountAggregation; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + + // For debug tracing: the name of the map or reduce task. + protected transient String taskName; + + // Debug display. + protected transient long batchCounter; + + public VectorGroupByCommon() { + super(); + } + + public static int INT_PER_LONG_COUNT = Long.SIZE / Integer.SIZE; + + public VectorGroupByCommon(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx); + + GroupByDesc desc = (GroupByDesc) conf; + this.conf = desc; + this.vectorDesc = (VectorGroupByDesc) vectorDesc; + vectorGroupByInfo = this.vectorDesc.getVectorGroupByInfo(); + + this.vContext = vContext; + + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); + + groupByKeyExpressions = this.vectorDesc.getKeyExpressions(); + + vectorAggregationDescs = this.vectorDesc.getVecAggrDescs(); + + aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + singleCountAggregation = vectorGroupByInfo.getSingleCountAggregation(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + // Determine the name of our map or reduce task for debug tracing. + BaseWork work = Utilities.getMapWork(hconf); + if (work == null) { + work = Utilities.getReduceWork(hconf); + } + taskName = work.getName(); + + batchCounter = 0; + } + + /** + * Implements the getName function for the Node Interface. + * + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "GBY"; + } + + @Override + public VectorizationContext getOutputVectorizationContext() { + return vOutContext; + } + + @Override + public VectorizationContext getInputVectorizationContext() { + return vContext; + } + + @Override + public VectorDesc getVectorDesc() { + return vectorDesc; + } + + @Override + public OperatorType getType() { + return OperatorType.GROUPBY; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java new file mode 100644 index 0000000..277aa25 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; + +import com.sun.tools.javac.util.ArrayUtils; + +/** + * This class is common operator class of Native Vectorized GroupBy for output generation. + * Taking the aggregations and filling up the output batch. + */ +public abstract class VectorGroupByCommonOutput + extends VectorGroupByCommon { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorGroupByCommonOutput.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient VectorizedRowBatch outputBatch; + + private transient VectorizedRowBatchCtx vrbCtx; + + private transient TypeInfo[] outputTypes; + + private transient StandardStructObjectInspector standardOutputObjInspector; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByCommonOutput() { + super(); + } + + public VectorGroupByCommonOutput(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + List objectInspectors = new ArrayList(); + + List outputFieldNames = conf.getOutputColumnNames(); + + final int keyCount = (groupByKeyExpressions == null ? 0 : groupByKeyExpressions.length); + final int aggrCount = (vectorAggregationDescs == null ? 0 : vectorAggregationDescs.length); + outputTypes = new TypeInfo[keyCount + aggrCount]; + int outputTypesIndex = 0; + + for(int i = 0; i < keyCount; ++i) { + TypeInfo outputTypeInfo = groupByKeyExpressions[i].getOutputTypeInfo(); + outputTypes[outputTypesIndex++] = outputTypeInfo; + ObjectInspector objInsp = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + outputTypeInfo); + objectInspectors.add(objInsp); + } + + for(int i = 0; i < aggrCount; ++i) { + TypeInfo outputTypeInfo = vectorAggregationDescs[i].getOutputTypeInfo(); + outputTypes[outputTypesIndex++] = outputTypeInfo; + ObjectInspector objInsp = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(outputTypeInfo); + objectInspectors.add(objInsp); + } + + standardOutputObjInspector = + ObjectInspectorFactory.getStandardStructObjectInspector(outputFieldNames, objectInspectors); + outputObjInspector = standardOutputObjInspector; + + /** + * Setup the output batch and vectorization context for downstream operators. + * + * NOTE: If we cannot do vectorized output, we still use the outputBatch while + * pulling information out of the hash table. In forwardOutputBatch, we extract rows + * and forward them one by one... + */ + vrbCtx = new VectorizedRowBatchCtx(); + vrbCtx.init(standardOutputObjInspector, vOutContext.getScratchColumnTypeNames()); + outputBatch = vrbCtx.createVectorizedRowBatch(); + } + + public void forwardOutputBatch(VectorizedRowBatch outputBatch) throws HiveException { + + forward(outputBatch, null); + + outputBatch.reset(); + } + + /** + * Copy all of the keys and aggregations to the output batch. + */ + protected abstract void outputGroupBy() throws HiveException; + + /** + * On close, make sure a partially filled overflow batch gets forwarded. + */ + @Override + public void closeOp(boolean aborted) throws HiveException { + super.closeOp(aborted); + if (!aborted) { + outputGroupBy(); + if (outputBatch.size > 0) { + forwardOutputBatch(outputBatch); + } + } + LOG.debug("VectorGroupByCommonOutputOperator closeOp " + batchCounter + " batches processed"); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java new file mode 100644 index 0000000..1733713 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.VectorGroupByCommonOutput; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is common hash operator class of Native Vectorized GroupBy for hash related + * initialization logic. + */ +public abstract class VectorGroupByHashCommon + extends VectorGroupByCommonOutput { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorGroupByHashCommon.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient long maxHashTableMemory; + + protected transient int floorPowerOf2MaxHashTableMemory; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashCommon() { + super(); + } + + public VectorGroupByHashCommon(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + final float memoryPercentage = conf.getGroupByMemoryUsage(); + final int testMaxMemoryAvailable = vectorGroupByInfo.getTestGroupByMaxMemoryAvailable(); + final long maxMemoryAvailable = + (testMaxMemoryAvailable == -1 ? + conf.getMaxMemoryAvailable() : testMaxMemoryAvailable); + maxHashTableMemory = (long) (memoryPercentage * maxMemoryAvailable); + floorPowerOf2MaxHashTableMemory = floorPowerOf2(maxHashTableMemory); + } + + /* + * Return the power of 2 that is equal to or next below a value. + * + * Example: + * 100000b = 2^5 = 32 + * where Long.numberOfLeadingZeros returns (64 - 6) = 58 + * and the result = 5. + * + * Replacing any set of lower 0's with 1's doesn't change the result. + * Or, numbers 32 to 63 return 5. + * + */ + public static int floorPowerOf2(long a) { + if (a == 0) { + return 0; + } + final int floorLeadingZerosCount = Long.numberOfLeadingZeros(a); + final int result = Long.SIZE - floorLeadingZerosCount - 1; + return result; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java new file mode 100644 index 0000000..8a6c6dc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hive.common.util.HashCodeUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; + +/** + * This class is common hash operator class of Native Vectorized GroupBy with common operator + * logic for checking key limits and the common process method logic. + */ +public abstract class VectorGroupByHashOperatorBase + extends VectorGroupByHashTable { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorGroupByHashOperatorBase.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private long flushAndStartOverCount; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashOperatorBase() { + super(); + } + + public VectorGroupByHashOperatorBase(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + flushAndStartOverCount = 0; + } + + public long getFlushAndStartOverCount() { + return flushAndStartOverCount; + } + + protected void checkKeyLimitOncePerBatch(final int inputLogicalSize) + throws HiveException, IOException { + + /* + * Check the hash table key limit for doing the worst case of adding all keys outside the + * inner loop for better performance. + */ + if (keyCount + inputLogicalSize > hashTableKeyCountLimit) { + flushAndStartOverCount++; + flushAndStartOver(); + if (keyCount + inputLogicalSize > hashTableKeyCountLimit) { + raise2ndHitOutOfStorage(); + } + } + } + + protected abstract void doBeforeMainLoopWork(final int inputLogicalSize) + throws HiveException, IOException; + + protected abstract void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException; + + @Override + public void process(Object row, int tag) throws HiveException { + + try { + VectorizedRowBatch batch = (VectorizedRowBatch) row; + + batchCounter++; + + final int inputLogicalSize = batch.size; + + if (inputLogicalSize == 0) { + if (LOG.isDebugEnabled()) { + LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); + } + return; + } + + /* + * Perform any key expressions. Results will go into scratch columns. + */ + if (groupByKeyExpressions != null) { + for (VectorExpression ve : groupByKeyExpressions) { + ve.evaluate(batch); + } + } + + doBeforeMainLoopWork(inputLogicalSize); + + doMainLoop(batch, inputLogicalSize); + + } catch (Exception e) { + throw new HiveException(e); + } + } + + protected void raise2ndHitOutOfStorage() throws HiveException { + throw new HiveException( + "After flushing hash table and clearing, there still isn't enough storage?"); + } + + protected void flushAndStartOver() throws HiveException, IOException { + + outputGroupByAndClearAll(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java new file mode 100644 index 0000000..aaa5bfc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; + +/** + * This class is common hash operator class of Native Vectorized GroupBy for the hash tables. + */ +public abstract class VectorGroupByHashTable + extends VectorGroupByHashCommon { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorGroupByHashTable.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + // The logical size and power of 2 mask of the hash table + protected transient int logicalHashBucketCount; + protected transient int logicalHashBucketMask; + + // The maximum number of keys we'll keep in the hash table before flushing. + protected transient int hashTableKeyCountLimit; + + protected transient long[] slotMultiples; + + protected transient int keyCount; + protected transient int largestNumberOfSteps; + protected transient int metricPutConflict; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashTable() { + super(); + } + + public VectorGroupByHashTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + protected void clearHashTable() { + Arrays.fill(slotMultiples, 0, slotPhysicalArraySize, 0); + keyCount = 0; + largestNumberOfSteps = 0; + metricPutConflict = 0; + } + + public void outputGroupByAndClearAll() throws HiveException, IOException { + + outputGroupBy(); + + clearHashTable(); + } + + // The number of longs in the hash table slot array. It is the logical size * entries per slot. + protected int slotPhysicalArraySize; + + // Since a maximum integer is 2^N - 1 it cannot be used we need one less than number of + // Integer bits. + private final static int MAX_POWER_OF_2_FOR_INT_INDEXING = Integer.SIZE - 1; + + // Make sure we have comfortable room for at least one batch of new keys to support the + // VectorGroupByHashOperatorBase.checkKeyLimitOncePerBatch method. + private final static int MIN_POWER_OF_2_SIZE = + floorPowerOf2(VectorizedRowBatch.DEFAULT_SIZE * 16L); + + // An arbitrary factor to divide the slot table size by to get the key count limit. + // Hitting the key count limit will cause the hash table to be flushed to Reduce and cleared + // for refilling. + private final static int KEY_COUNT_FACTOR = 8; + + /* + * For now, we are just allocating the slot table array. + * FUTURE: We'll need to revisit these calculations when we support STRING keys. + */ + protected void allocateBucketArray(int multiplier) { + + // No matter how much memory they want to give us, our array is limited to int indexing. + final int maxPowerOf2Memory = + Math.min(floorPowerOf2MaxHashTableMemory, MAX_POWER_OF_2_FOR_INT_INDEXING); + + final int powerOf2Memory = + Math.max(maxPowerOf2Memory, MIN_POWER_OF_2_SIZE); + + /* + * CONCERN: + * Do we really want a hash table to use the maximum supplied memory immediately? + * That could waste memory that other operators could use. And, cause Java GC + * issues because of how large the single slot table array is. Large hash tables + * with small keys sets could cause lots of unnecessary cold RAM hits. There is a tension + * here, of course. Too small a table and there will be more insert collisions. + * + * In contrast, the current VectorGroupByOperator and GroupByOperator classes use a + * Java HeapMap which automatically grows over time. + * + * The issues here are similar to MapJoin, except we have the possibility of using a smaller + * hash table and flushing everything to Reduce. Then, creating a larger slot table instead + * of zeroing the current one. MapJoin cannot flush -- it either needs to expand its + * hash tables to hold everything or spill some of the data to secondary storage (Hybrid Grace). + */ + + slotPhysicalArraySize = 1 << powerOf2Memory; + + logicalHashBucketCount = slotPhysicalArraySize / multiplier; + logicalHashBucketMask = logicalHashBucketCount - 1; + + hashTableKeyCountLimit = logicalHashBucketCount / KEY_COUNT_FACTOR; + + slotMultiples = new long[slotPhysicalArraySize]; + + keyCount = 0; + largestNumberOfSteps = 0; + metricPutConflict = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyOperatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyOperatorBase.java new file mode 100644 index 0000000..57926a1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyOperatorBase.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hive.common.util.HashCodeUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; + +/** + * This class is common one long key hash operator class of Native Vectorized GroupBy with common + * operator logic for one long key main loop logic. + */ +public abstract class VectorGroupByHashLongKeyOperatorBase + extends VectorGroupByHashLongKeyTable { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorGroupByHashLongKeyOperatorBase.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyOperatorBase() { + super(); + } + + public VectorGroupByHashLongKeyOperatorBase(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyTable.java new file mode 100644 index 0000000..b73898f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyTable.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; + +/* + * An single long key map optimized for Native Vectorized GroupByy. + */ +public abstract class VectorGroupByHashLongKeyTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorGroupByHashLongKeyTable.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected int longKeyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyTable() { + super(); + + longKeyColumnNum = -1; + } + + public VectorGroupByHashLongKeyTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + longKeyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountColumnOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountColumnOperator.java new file mode 100644 index 0000000..6db4995 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountColumnOperator.java @@ -0,0 +1,890 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.count; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hive.common.util.HashCodeUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * Specialized class for doing a COUNT(non-key-column) Native Vectorized GroupBy. That is, + * the grouping is being done on a long key and the counting is for a another ("non-key") + * column (which can be any data type). + * + * We make a single pass. We loop over key column and process the keys. We look for + * sequences of NULL keys or equal keys. And, at the same time do any processing for the + * non-key-column counting. + * + * NOTE: Both NULL and non-NULL keys have counts for non-key-columns. So, after counting the + * non-NULL fields for the non-key-column, we always do a hash table find/create even when the count + * is 0 since the all those keys must be part of the output result. + + // A key will get created even when there are no non-NULL column values. Count includes 0. + + findOrCreateLongZeroCountKey( + key, + longKeySeries.currentHashCode, + nonNullCount); + + */ +public class VectorGroupByHashLongKeyCountColumnOperator + extends VectorGroupByHashLongKeyZeroCountTable { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = + VectorGroupByHashLongKeyCountColumnOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + protected int countColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyCountColumnOperator() { + super(); + } + + public VectorGroupByHashLongKeyCountColumnOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + countColumnNum = singleCountAggregation.getCountColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * Both NULL and non-NULL keys have counts for non-key-columns. + * + * And when count == 0, we still must create an entry in the slot table. + * + * When non-key-column NO NULLS, the column count is simply inputLogicalSize. + * When non-key-column REPEATING NULLS, the column count is simply 0. + * Otherwise, non-key-column NO REPEATING NULLS, we have to loop through and count the non-null + * rows. + * + */ + @Override + protected void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + /* + * First, determine the count of the non-key column for the whole batch which is covered by the + * repeating key. + */ + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + int nonKeyNonNullCount; + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: [REPEATING,] NO NULLS + nonKeyNonNullCount = inputLogicalSize; + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible + nonKeyNonNullCount = (nonKeyColVector.isNull[0] ? 0 : inputLogicalSize); + + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + nonKeyNonNullCount = 0; + if (batch.selectedInUse) { + + int[] selected = batch.selected; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + if (nonKeyIsNull[batchIndex]) { + nonKeyNonNullCount++; + } + } + } else { + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + if (nonKeyIsNull[batchIndex]) { + nonKeyNonNullCount++; + } + } + } + } + + /* + * Finally, use the non-key count for our non-NULL or NULL key. + */ + if (keyLongColVector.noNulls || !keyLongColVector.isNull[0]) { + + // Non-NULL key. + final long repeatingKey = keyLongColVector.vector[0]; + findOrCreateLongZeroCountKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + nonKeyNonNullCount); + } else { + + // All NULL key. Since we are counting a non-Key column, we must count it under the NULL + // entry. + haveNullKey = true; + nullKeyCount += nonKeyNonNullCount; + + } + } + + private void doLogicalNoNullsKeyNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, LongColumnVector keyLongColVector) + throws HiveException, IOException { + + int[] selected = batch.selected; + + long[] keyVector = keyLongColVector.vector; + + long currLongKey = keyVector[selected[0]]; + int duplicateKeyCount = 1; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final long nextLongKey = keyVector[selected[logicalIndex]]; + if (currLongKey == nextLongKey) { + + duplicateKeyCount++; + } else { + + // Current key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New current key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } + } + // Handle last key. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + } + + private void doLogicalNoNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, + final int inputLogicalSize, LongColumnVector keyLongColVector) + throws HiveException, IOException { + + int[] selected = batch.selected; + + long[] keyVector = keyLongColVector.vector; + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + + long currLongKey = keyVector[selected[0]]; + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final long nextLongKey = keyVector[selected[logicalIndex]]; + if (currLongKey == nextLongKey) { + + // No counting. + } else { + + // Current key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + 0); + + // New current key. + currLongKey = nextLongKey; + } + } + // Handle last key. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + 0); + } + + private void doLogicalNoNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, LongColumnVector keyLongColVector, ColumnVector nonKeyColVector) + throws HiveException, IOException { + + int[] selected = batch.selected; + + long[] keyVector = keyLongColVector.vector; + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + + int batchIndex = selected[0]; + long currLongKey = keyVector[batchIndex]; + int currNonNullCount = (nonKeyIsNull[batchIndex] ? 0 : 1); + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + batchIndex = selected[logicalIndex]; + final long nextLongKey = keyVector[batchIndex]; + if (currLongKey == nextLongKey) { + + currNonNullCount += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + currNonNullCount); + + // New current key. + currLongKey = nextLongKey; + currNonNullCount = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + // Handle last key. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + currNonNullCount); + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * In general, loop over key column and process the keys. Look for sequences of equal keys. And, + * at the same time do any processing for the non-key-column counting. + * + * When non-key-column NO NULLS, look for sequences of equal keys and determine + * duplicateKeyCount. The column count is simply duplicateKeyCount. + * + * When non-key-column REPEATING NULLS, scan for sequences of equal keys. The column count is + * simply 0 because of all NULL values -- but we still must create an entry in the slot table. + * + * Otherwise, non-key-column NO REPEATING NULLS, as we are looking for sequence of equal keys -- + * look over at the non-key-column and count non-null rows. Even when the non-null row count + * is 0, we still must create an entry in the slot table. + * + */ + @Override + protected void handleLogicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: [REPEATING,] NO NULLS + + doLogicalNoNullsKeyNoNullsColumn(batch, inputLogicalSize, keyLongColVector); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doLogicalNoNullsKeyRepeatingNullColumn(batch, inputLogicalSize, keyLongColVector); + + } else { + + // Non-NULL repeating non-key column. + doLogicalNoNullsKeyNoNullsColumn(batch, inputLogicalSize, keyLongColVector); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS. + + doLogicalNoNullsKeyNullsColumn(batch, inputLogicalSize, keyLongColVector, nonKeyColVector); + + } + } + + private void doLogicalNullsKeyNoNullsColumn(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + + long[] keyVector = keyLongColVector.vector; + boolean[] keyIsNull = keyLongColVector.isNull; + + boolean currKeyIsNull; + long currLongKey; + final int firstBatchIndex = selected[0]; + if (keyIsNull[firstBatchIndex]) { + currKeyIsNull = true; + currLongKey = 0; + } else { + currKeyIsNull = false; + currLongKey = keyVector[firstBatchIndex]; + } + int duplicateKeyCount = 1; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + duplicateKeyCount++; + } else { + + // Current non-NULL key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New NULL key. + currKeyIsNull = true; + duplicateKeyCount = 1; + } + + } else { + final long nextLongKey = keyVector[batchIndex]; + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += duplicateKeyCount; + + // New non-NULL key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } else if (currLongKey == nextLongKey) { + + duplicateKeyCount++; + } else { + + // Current non-NULL key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New non-NULL key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } + } + } + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += duplicateKeyCount; + } else { + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + } + } + + private void doLogicalNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + + long[] keyVector = keyLongColVector.vector; + boolean[] keyIsNull = keyLongColVector.isNull; + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + + boolean currKeyIsNull; + long currLongKey; + final int firstBatchIndex = selected[0]; + if (keyIsNull[firstBatchIndex]) { + currKeyIsNull = true; + currLongKey = 0; + } else { + currKeyIsNull = false; + currLongKey = keyVector[firstBatchIndex]; + } + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // No counting. + } else { + + // Current non-NULL key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + 0); + + // New NULL key. + currKeyIsNull = true; + } + + } else { + final long nextLongKey = keyVector[batchIndex]; + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + + // New non-NULL key. + currLongKey = nextLongKey; + } else if (currLongKey == nextLongKey) { + + // No counting + } else { + + // Current non-NULL key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + 0); + + // New non-NULL key. + currLongKey = nextLongKey; + } + } + } + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + } else { + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + 0); + } + } + + private void doLogicalNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, LongColumnVector keyLongColVector, ColumnVector nonKeyColVector) + throws HiveException, IOException { + + int[] selected = batch.selected; + + long[] keyVector = keyLongColVector.vector; + boolean[] keyIsNull = keyLongColVector.isNull; + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + + boolean currKeyIsNull; + long currLongKey; + final int firstBatchIndex = selected[0]; + if (keyIsNull[firstBatchIndex]) { + currKeyIsNull = true; + currLongKey = 0; + } else { + currKeyIsNull = false; + currLongKey = keyVector[firstBatchIndex]; + } + int currNonNullCount = (nonKeyIsNull[firstBatchIndex] ? 0 : 1); + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + currNonNullCount += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + currNonNullCount); + + // New NULL key. + currKeyIsNull = true; + currNonNullCount = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + + } else { + final long nextLongKey = keyVector[batchIndex]; + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += currNonNullCount; + + // New non-NULL key. + currLongKey = nextLongKey; + currNonNullCount = (nonKeyIsNull[batchIndex] ? 0 : 1); + } else if (currLongKey == nextLongKey) { + + currNonNullCount += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + currNonNullCount); + + // New non-NULL key. + currLongKey = nextLongKey; + currNonNullCount = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + } + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += currNonNullCount; + } else { + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + currNonNullCount); + } + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * Both NULL and non-NULL keys have counts for non-key-columns. + * + * In general, loop over key column and process the keys. Look for sequences of NULL keys or + * equal keys. And, at the same time do any processing for the non-key-column counting. + * + * When non-key-column NO NULLS, look for sequences of NULL keys or equal keys and determine + * duplicateKeyCount. The column count is simply duplicateKeyCount. + * + * When non-key-column REPEATING NULLS, scan for sequences of NULL keys or equal keys. The column + * count is simply 0 because of all NULL values -- but we still must create an entry in the + * slot table. + * + * Otherwise, non-key-column NO REPEATING NULLS, as we are looking for sequence of NULL keys or + * equal keys -- look over at the non-key-column and count non-null rows. Even when the non-null + * row count is 0, we still must create an entry in the slot table. + * + * In all cases above, when its a NULL key, do NULL entry processing. + * + */ + @Override + protected void handleLogicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: [REPEATING,] NO NULLS + + doLogicalNullsKeyNoNullsColumn(batch, inputLogicalSize, keyLongColVector); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doLogicalNullsKeyRepeatingNullColumn(batch, inputLogicalSize, keyLongColVector); + + } else { + + // Non-NULL repeating non-key column. + doLogicalNullsKeyNoNullsColumn(batch, inputLogicalSize, keyLongColVector); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + doLogicalNullsKeyNullsColumn(batch, inputLogicalSize, keyLongColVector, nonKeyColVector); + + } + } + + //=============================================================================================== + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NO NULLS key case. + * + * (For remaining comments for handleLogicalNoNullsKey). + */ + @Override + protected void handlePhysicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + long[] keyVector = keyLongColVector.vector; + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: [REPEATING,] NO NULLS + + long currLongKey = keyVector[0]; + int duplicateKeyCount = 1; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + final long nextLongKey = keyVector[batchIndex]; + if (currLongKey == nextLongKey) { + + duplicateKeyCount++; + } else { + + // Current key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New current key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } + } + // Handle last key. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + + long currLongKey = keyVector[0]; + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + final long nextLongKey = keyVector[batchIndex]; + if (currLongKey == nextLongKey) { + + // No counting. + } else { + + // Current key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + 0); + + // New current key. + currLongKey = nextLongKey; + } + } + // Handle last key. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + 0); + } else { + + // Non-NULL repeating non-key column. + int currNonNullCount = 1; + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + final long nextLongKey = keyVector[batchIndex]; + if (currLongKey == nextLongKey) { + + currNonNullCount++; + } else { + + // Current key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + currNonNullCount); + + // New current key. + currLongKey = nextLongKey; + currNonNullCount = 1; + } + } + // Handle last key. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + currNonNullCount); + } + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + long currLongKey = keyVector[0]; + int currNonNullCount = (nonKeyIsNull[0] ? 0 : 1); + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + final long nextLongKey = keyVector[batchIndex]; + if (currLongKey == nextLongKey) { + + currNonNullCount += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + currNonNullCount); + + // New current key. + currLongKey = nextLongKey; + currNonNullCount = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + // Handle last key. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + currNonNullCount); + } + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NULLS key case. + * + * (For remaining comments for handleLogicalNullsKey). + * + */ + @Override + protected void handlePhysicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + long[] keyVector = keyLongColVector.vector; + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + + boolean[] keyIsNull = keyLongColVector.isNull; + + boolean currKeyIsNull; + long currLongKey; + if (keyIsNull[0]) { + currKeyIsNull = true; + currLongKey = 0; + } else { + currKeyIsNull = false; + currLongKey = keyVector[0]; + } + + // UNDONE: We need repeating logic here for non-key column like handleLogicalNullsKey.. + + int currNonNullCount = (nonKeyIsNull[0] ? 0 : 1); + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + currNonNullCount += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + currNonNullCount); + + // New NULL key. + currKeyIsNull = true; + currNonNullCount = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + + } else { + final long nextLongKey = keyVector[batchIndex]; + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += currNonNullCount; + + // New non-NULL key. + currLongKey = nextLongKey; + currNonNullCount = (nonKeyIsNull[batchIndex] ? 0 : 1); + } else if (currLongKey == nextLongKey) { + + currNonNullCount += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + currNonNullCount); + + // New non-NULL key. + currLongKey = nextLongKey; + currNonNullCount = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + } + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += currNonNullCount; + } else { + findOrCreateLongZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + currNonNullCount); + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountKeyOperator.java new file mode 100644 index 0000000..1fcdf1b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountKeyOperator.java @@ -0,0 +1,374 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.count; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hive.common.util.HashCodeUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * Specialized class for doing a COUNT(key-column) Native Vectorized GroupBy. That is, + * the grouping is being done on one long key and we are counting it. + * + * The NULL key is not represented in the hash table. We handle them as a special case. So, + * the find/create call for non-NULL keys looks like this: + + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + */ +public class VectorGroupByHashLongKeyCountKeyOperator + extends VectorGroupByHashLongKeyNonZeroCountTable { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = + VectorGroupByHashLongKeyCountKeyOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyCountKeyOperator() { + super(); + } + + public VectorGroupByHashLongKeyCountKeyOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For all NULL keys case we note NULL key exists but leave its count as 0. + */ + @Override + protected void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + if (keyLongColVector.noNulls || !keyLongColVector.isNull[0]) { + final long repeatingKey = keyLongColVector.vector[0]; + findOrCreateLongNonZeroCountKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + inputLogicalSize); + } else { + + // We note we encountered a repeating NULL key. But there will be no count for it -- + // just NULL. + haveNullKey = true; + } + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * Do find/create on each key with count duplicateKeyCount. + */ + @Override + protected void handleLogicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + + long[] keyVector = keyLongColVector.vector; + + long currLongKey = keyVector[selected[0]]; + int duplicateKeyCount = 1; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final long nextLongKey = keyVector[selected[logicalIndex]]; + if (currLongKey == nextLongKey) { + + duplicateKeyCount++; + } else { + + // Current key ended. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New current key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } + } + // Handle last key. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys cases we note NULL key exists but leave its count as 0. + * + * Do find/create on each non-NULL key with count duplicateKeyCount. + */ + @Override + protected void handleLogicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + + long[] keyVector = keyLongColVector.vector; + + boolean[] isNull = keyLongColVector.isNull; + + boolean currKeyIsNull; + long currLongKey; + int duplicateKeyCount; + final int firstBatchIndex = selected[0]; + if (isNull[firstBatchIndex]) { + currKeyIsNull = true; + currLongKey = 0; + duplicateKeyCount = 0; // We don't count NULLs for NULL key. + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } else { + currKeyIsNull = false; + currLongKey = keyVector[firstBatchIndex]; + duplicateKeyCount = 1; + } + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (isNull[batchIndex]) { + + if (currKeyIsNull) { + + // We don't count NULLs for NULL key. + } else { + + // Current non-NULL key ended. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New NULL key. + currKeyIsNull = true; + duplicateKeyCount = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } + + } else { + final long nextLongKey = keyVector[selected[logicalIndex]]; + if (currKeyIsNull) { + + // Current NULL key ended. We don't count NULLs for NULL key. + currKeyIsNull = false; + + // New non-NULL key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } else if (currLongKey == nextLongKey) { + + duplicateKeyCount++; + } else { + + // Current non-NULL key ended. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New non-NULL key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } + } + } + // Handle last key. + if (!currKeyIsNull) { + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + } + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NO NULLS key case. + * + * (For remaining comments for handleLogicalNoNullsKey). + */ + @Override + protected void handlePhysicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + long[] keyVector = keyLongColVector.vector; + + long currLongKey = keyVector[0]; + int duplicateKeyCount = 1; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + final long nextLongKey = keyVector[batchIndex]; + if (currLongKey == nextLongKey) { + + duplicateKeyCount++; + } else { + + // Current key ended. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New current key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } + } + // Handle last key. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NULLS key case. + * + * (For remaining comments for handleLogicalNullsKey). + * + */ + @Override + protected void handlePhysicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + long[] keyVector = keyLongColVector.vector; + + boolean[] isNull = keyLongColVector.isNull; + + boolean currKeyIsNull; + long currLongKey; + int duplicateKeyCount; + if (isNull[0]) { + currKeyIsNull = true; + currLongKey = 0; + duplicateKeyCount = 0; // We don't count NULLs for NULL key. + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } else { + currKeyIsNull = false; + currLongKey = keyVector[0]; + duplicateKeyCount = 1; + } + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + + if (isNull[batchIndex]) { + + if (currKeyIsNull) { + + // We don't count NULLs for NULL key. + } else { + + // Current non-NULL key ended. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New NULL key. + currKeyIsNull = true; + duplicateKeyCount = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } + + } else { + final long nextLongKey = keyVector[batchIndex]; + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // New non-NULL key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } else if (currLongKey == nextLongKey) { + + duplicateKeyCount++; + } else { + + // Current non-NULL key ended. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New non-NULL key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } + } + } + // Handle last key. + if (!currKeyIsNull) { + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountOperatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountOperatorBase.java new file mode 100644 index 0000000..5f7a168 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountOperatorBase.java @@ -0,0 +1,202 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.count; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.VectorGroupByHashLongKeyTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; + +/* + * An single long key map optimized for vectorized only group by. + */ +public abstract class VectorGroupByHashLongKeyCountOperatorBase + extends VectorGroupByHashLongKeyTable { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = + VectorGroupByHashLongKeyCountOperatorBase.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyCountOperatorBase() { + super(); + } + + public VectorGroupByHashLongKeyCountOperatorBase(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + haveNullKey = false; + nullKeyCount = 0; + } + + @Override + public void outputGroupByAndClearAll() throws HiveException { + + outputGroupBy(); + if (outputBatch.size > 0) { + forwardOutputBatch(outputBatch); + } + clearHashTable(); + + // No storage to clear! + + haveNullKey = false; + nullKeyCount = 0; + } + + @Override + protected void doBeforeMainLoopWork(final int inputLogicalSize) + throws HiveException, IOException { + checkKeyLimitOncePerBatch(inputLogicalSize); + } + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + LongColumnVector keyLongColVector = (LongColumnVector) batch.cols[longKeyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyLongColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyLongColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyLongColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyLongColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyLongColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyLongColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyLongColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyLongColVector); + } + } + } + + protected abstract void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException; + + protected abstract void handleLogicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException; + + protected abstract void handleLogicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException; + + protected abstract void handlePhysicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException; + + protected abstract void handlePhysicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException; + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + LongColumnVector longKeyColumnVector = (LongColumnVector) outputBatch.cols[0]; + + LongColumnVector countKeyColumnVector = (LongColumnVector) outputBatch.cols[1]; + + if (haveNullKey) { + + // NULL entry to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int nullBatchIndex = outputBatch.size; + longKeyColumnVector.isNull[nullBatchIndex] = true; + longKeyColumnVector.noNulls = false; + + countKeyColumnVector.isNull[nullBatchIndex] = false; + countKeyColumnVector.vector[nullBatchIndex] = nullKeyCount; + outputBatch.size++; + } + + outputLongKeyAndCountPairs(longKeyColumnVector, countKeyColumnVector); + } + + protected abstract void outputLongKeyAndCountPairs( + LongColumnVector longKeyColumnVector, + LongColumnVector countKeyColumnVector) throws HiveException; +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountStarOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountStarOperator.java new file mode 100644 index 0000000..b1abf9c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountStarOperator.java @@ -0,0 +1,369 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.count; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * Specialized class for doing a COUNT(*) Native Vectorized GroupBy that is lookup on a single long + * using a specialized hash map. + * + Count Star + + NULL key has separate counter. + + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + */ +public class VectorGroupByHashLongKeyCountStarOperator + extends VectorGroupByHashLongKeyNonZeroCountTable { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = + VectorGroupByHashLongKeyCountStarOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyCountStarOperator() { + super(); + } + + public VectorGroupByHashLongKeyCountStarOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For all NULL keys case we note NULL key exists AND count it. + */ + @Override + protected void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + if (keyLongColVector.noNulls || !keyLongColVector.isNull[0]) { + final long repeatingKey = keyLongColVector.vector[0]; + findOrCreateLongNonZeroCountKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + inputLogicalSize); + } else { + + // We note we encountered a repeating NULL key. + haveNullKey = true; + nullKeyCount += inputLogicalSize; + } + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * Do find/create on each key with count duplicateKeyCount. + */ + @Override + protected void handleLogicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + + long[] keyVector = keyLongColVector.vector; + + long currLongKey = keyVector[selected[0]]; + int duplicateKeyCount = 1; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final long nextLongKey = keyVector[selected[logicalIndex]]; + if (currLongKey == nextLongKey) { + + duplicateKeyCount++; + } else { + + // Current key ended. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New current key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } + } + // Handle last key. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys we note NULL key exists AND count it duplicateKeyCount. + * + * Do find/create on each non-NULL key with count duplicateKeyCount. + */ + @Override + protected void handleLogicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + + long[] keyVector = keyLongColVector.vector; + + boolean[] isNull = keyLongColVector.isNull; + + boolean currKeyIsNull; + long currLongKey; + final int firstBatchIndex = selected[0]; + if (isNull[firstBatchIndex]) { + currKeyIsNull = true; + currLongKey = 0; + } else { + currKeyIsNull = false; + currLongKey = keyVector[firstBatchIndex]; + } + int duplicateKeyCount = 1; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (isNull[batchIndex]) { + + if (currKeyIsNull) { + + duplicateKeyCount++; + } else { + + // Current non-NULL key ended. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New NULL key. + currKeyIsNull = true; + duplicateKeyCount = 1; + } + + } else { + final long nextLongKey = keyVector[batchIndex]; + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += duplicateKeyCount; + + // New non-NULL key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } else if (currLongKey == nextLongKey) { + + duplicateKeyCount++; + } else { + + // Current non-NULL key ended. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New non-NULL key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } + } + } + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += duplicateKeyCount; + } else { + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + } + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NO NULLS key case. + * + * (For remaining comments for handleLogicalNoNullsKey). + */ + @Override + protected void handlePhysicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + long[] keyVector = keyLongColVector.vector; + + long currLongKey = keyVector[0]; + int duplicateKeyCount = 1; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + final long nextLongKey = keyVector[batchIndex]; + if (currLongKey == nextLongKey) { + + duplicateKeyCount++; + } else { + + // Current key ended. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New current key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } + } + // Handle last key. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NULLS key case. + * + * (For remaining comments for handleLogicalNullsKey). + * + */ + @Override + protected void handlePhysicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + long[] keyVector = keyLongColVector.vector; + + boolean[] isNull = keyLongColVector.isNull; + + boolean currKeyIsNull; + long currLongKey; + if (isNull[0]) { + currKeyIsNull = true; + currLongKey = 0; + } else { + currKeyIsNull = false; + currLongKey = keyVector[0]; + } + int duplicateKeyCount = 1; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + + if (isNull[batchIndex]) { + + if (currKeyIsNull) { + duplicateKeyCount++; + } else { + + // Current non-NULL key ended. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New NULL key. + currKeyIsNull = true; + duplicateKeyCount = 1; + } + + } else { + final long nextLongKey = keyVector[batchIndex]; + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += duplicateKeyCount; + + // New non-NULL key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } else if (currLongKey == nextLongKey) { + + duplicateKeyCount++; + } else { + + // Current non-NULL key ended. + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + + // New non-NULL key. + currLongKey = nextLongKey; + duplicateKeyCount = 1; + } + } + } + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += duplicateKeyCount; + } else { + findOrCreateLongNonZeroCountKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey), + duplicateKeyCount); + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyNonZeroCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyNonZeroCountTable.java new file mode 100644 index 0000000..dc6f471 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyNonZeroCountTable.java @@ -0,0 +1,180 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.count; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.Future; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * A single long key hash table optimized for COUNT(*) Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashLongKeyNonZeroCountTable + extends VectorGroupByHashLongKeyCountOperatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = + VectorGroupByHashLongKeyNonZeroCountTable.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyNonZeroCountTable() { + super(); + } + + public VectorGroupByHashLongKeyNonZeroCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + allocateBucketArray(LONG_NON_ZERO_COUNT_ENTRY_SIZE); + } + + //------------------------------------------------------------------------------------------------ + + private static int LONG_NON_ZERO_COUNT_ENTRY_SIZE = 2; + + public void findOrCreateLongNonZeroCountKey(long key, long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int pairIndex = 0; + while (true) { + pairIndex = 2 * slot; + if (slotMultiples[pairIndex + 1] == 0) { + isNewKey = true; + break; + } + if (key == slotMultiples[pairIndex]) { + isNewKey = false; + break; + } + ++metricPutConflict; + // Some other key (collision) - keep probing. + probeSlot += (++i); + slot = (int)(probeSlot & logicalHashBucketMask); + } + + if (largestNumberOfSteps < i) { + if (LOG.isDebugEnabled()) { + LOG.debug("Probed " + i + " slots (the longest so far) to find space"); + } + largestNumberOfSteps = i; + // debugDumpKeyProbe(keyOffset, keyLength, hashCode, slot); + } + + if (isNewKey) { + slotMultiples[pairIndex] = key; + keyCount++; + slotMultiples[pairIndex + 1] = count; + } else { + slotMultiples[pairIndex + 1] += count; + } + } + + private int nonZeroCountPairIndex; + private long currentNonZeroCount; + + protected int initLongNonZeroCountKeyIterator() { + nonZeroCountPairIndex = 0; + currentNonZeroCount = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNext() { + while (true) { + long count = slotMultiples[nonZeroCountPairIndex + 1]; + if (count > 0) { + currentNonZeroCount = count; + long key = slotMultiples[nonZeroCountPairIndex]; + nonZeroCountPairIndex += 2; + return key; + } + nonZeroCountPairIndex += 2; + } + } + + public long getLongNonZeroCount() { + return currentNonZeroCount; + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one long key non-zero count hash table to the + * output. + */ + @Override + protected void outputLongKeyAndCountPairs( + LongColumnVector longKeyColumnVector, + LongColumnVector countKeyColumnVector) throws HiveException { + + boolean[] keyIsNull = longKeyColumnVector.isNull; + long[] keyVector = longKeyColumnVector.vector; + boolean[] countIsNull = countKeyColumnVector.isNull; + long[] countVector = countKeyColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongNonZeroCountKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int i = startBatchIndex; i < startBatchIndex + count; i++) { + keyVector[i] = getNext(); + countVector[i] = getLongNonZeroCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyZeroCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyZeroCountTable.java new file mode 100644 index 0000000..c06d7d0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyZeroCountTable.java @@ -0,0 +1,199 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.count; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.Future; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * A single long key hash table optimized for COUNT(key-column) and COUNT(non-key-column) + * Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashLongKeyZeroCountTable + extends VectorGroupByHashLongKeyCountOperatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = + VectorGroupByHashLongKeyZeroCountTable.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyZeroCountTable() { + super(); + } + + public VectorGroupByHashLongKeyZeroCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + allocateBucketArray(LONG_ZERO_COUNT_ENTRY_SIZE); + } + + //------------------------------------------------------------------------------------------------ + + private static long LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK = 1L << 63; + + private static int LONG_ZERO_COUNT_ENTRY_SIZE = 2; + + public void findOrCreateLongZeroCountKey(long key, long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int pairIndex = 0; + while (true) { + pairIndex = 2 * slot; + if (slotMultiples[pairIndex + 1] == 0) { + isNewKey = true; + break; + } + if (key == slotMultiples[pairIndex]) { + isNewKey = false; + break; + } + ++metricPutConflict; + // Some other key (collision) - keep probing. + probeSlot += (++i); + slot = (int)(probeSlot & logicalHashBucketMask); + } + + if (largestNumberOfSteps < i) { + // if (isLogDebugEnabled) { + LOG.debug("Probed " + i + " slots (the longest so far) to find space"); + // } + largestNumberOfSteps = i; + // debugDumpKeyProbe(keyOffset, keyLength, hashCode, slot); + } + + if (isNewKey) { + slotMultiples[pairIndex] = key; + keyCount++; + if (count == 0) { + slotMultiples[pairIndex + 1] = LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK; + } else { + slotMultiples[pairIndex + 1] = count; + } + } else if (count > 0) { + + // Only update count when we are leaving 0. + if (slotMultiples[pairIndex + 1] == LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK) { + slotMultiples[pairIndex + 1] = count; + } else { + slotMultiples[pairIndex + 1] += count; + } + } + } + + private int countKeyPairIndex; + private long currentCountKeyCount; + + protected int initLongZeroCountKeyIterator() { + countKeyPairIndex = 0; + currentCountKeyCount = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNext() { + while (true) { + long count = slotMultiples[countKeyPairIndex + 1]; + if (count != 0) { + if (count == LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK) { + currentCountKeyCount = 0; + } else { + currentCountKeyCount = count; + } + long key = slotMultiples[countKeyPairIndex]; + countKeyPairIndex += 2; + return key; + } + countKeyPairIndex += 2; + } + } + + public long getLongZeroCount() { + return currentCountKeyCount; + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one long key zero count hash table to the + * output. + */ + @Override + protected void outputLongKeyAndCountPairs( + LongColumnVector longKeyColumnVector, + LongColumnVector countKeyColumnVector) throws HiveException { + + boolean[] keyIsNull = longKeyColumnVector.isNull; + long[] keyVector = longKeyColumnVector.vector; + boolean[] countIsNull = countKeyColumnVector.isNull; + long[] countVector = countKeyColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongZeroCountKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int i = startBatchIndex; i < startBatchIndex + count; i++) { + keyVector[i] = getNext(); + countVector[i] = getLongZeroCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/dupred/VectorGroupByHashLongKeyDuplicateReductionOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/dupred/VectorGroupByHashLongKeyDuplicateReductionOperator.java new file mode 100644 index 0000000..cd2e474 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/dupred/VectorGroupByHashLongKeyDuplicateReductionOperator.java @@ -0,0 +1,524 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.dupred; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; +import org.apache.hive.common.util.HashCodeUtil; + +/* + * Specialized class for doing a Native Vectorized GroupBy with no aggregation. + * + * It is used on one long key for duplicate key reduction. + * + * Final duplicate elimination must be done in reduce-shuffle and a reducer since with hash table + * overflow some duplicates can slip through. And, of course, other vertices may contribute + * the same keys. + */ +public class VectorGroupByHashLongKeyDuplicateReductionOperator + extends VectorGroupByHashLongKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = + VectorGroupByHashLongKeyDuplicateReductionOperator.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + protected transient boolean haveZeroKey; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyDuplicateReductionOperator() { + super(); + } + + public VectorGroupByHashLongKeyDuplicateReductionOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + this.vContext = vContext; + this.vectorDesc = (VectorGroupByDesc) vectorDesc; + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + haveNullKey = false; + haveZeroKey = false; + } + + @Override + public void outputGroupByAndClearAll() throws HiveException { + + outputGroupBy(); + if (outputBatch.size > 0) { + forwardOutputBatch(outputBatch); + } + clearHashTable(); + + // No storage to clear! + + haveNullKey = false; + haveZeroKey = false; + } + + @Override + protected void doBeforeMainLoopWork(final int inputLogicalSize) + throws HiveException, IOException { + + /* + * If the hash table has less than the worst-case inputLogicalSize keys that + * could be added, then flush the current hash table entries and clear it. + */ + checkKeyLimitOncePerBatch(inputLogicalSize); + } + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + LongColumnVector keyLongColVector = (LongColumnVector) batch.cols[longKeyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyLongColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyLongColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyLongColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyLongColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyLongColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyLongColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyLongColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyLongColVector); + } + } + } + + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For the all NULL or all 0 keys case we note NULL/0 key exists. Otherwise, we do the + * find/create. + */ + protected void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + if (keyLongColVector.noNulls || !keyLongColVector.isNull[0]) { + final long repeatingKey = keyLongColVector.vector[0]; + if (repeatingKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey)); + } + } else { + + // We note we encountered a repeating NULL key. + haveNullKey = true; + } + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * Do find/create on each key with count duplicateKeyCount. + */ + protected void handleLogicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + + long[] keyVector = keyLongColVector.vector; + + long currLongKey = keyVector[selected[0]]; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final long nextLongKey = keyVector[selected[logicalIndex]]; + if (currLongKey == nextLongKey) { + + // Equal key series. + } else { + + // Current key ended. + if (currLongKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey)); + } + + // New current key. + currLongKey = nextLongKey; + } + } + // Handle last key. + if (currLongKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey)); + } + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys cases we note NULL key exists but leave its count as 0. + * + * Do find/create on each non-NULL key with count duplicateKeyCount. + */ + protected void handleLogicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + + long[] keyVector = keyLongColVector.vector; + + boolean[] isNull = keyLongColVector.isNull; + + boolean currKeyIsNull; + long currLongKey; + final int firstBatchIndex = selected[0]; + if (isNull[firstBatchIndex]) { + currKeyIsNull = true; + currLongKey = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } else { + currKeyIsNull = false; + currLongKey = keyVector[firstBatchIndex]; + } + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (isNull[batchIndex]) { + + if (currKeyIsNull) { + + // NULL key series. + } else { + + // Current non-NULL key ended by NULL key. + if (currLongKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey)); + } + + // New NULL key. + currKeyIsNull = true; + + // We note we encountered a NULL key. + haveNullKey = true; + } + + } else { + final long nextLongKey = keyVector[selected[logicalIndex]]; + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // New non-NULL key. + currLongKey = nextLongKey; + } else if (currLongKey == nextLongKey) { + + // Equal key series. + } else { + + // Current non-NULL key ended by another non-NULL key. + if (currLongKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey)); + } + + // New non-NULL key. + currLongKey = nextLongKey; + } + } + } + // Handle last key. + if (!currKeyIsNull) { + if (currLongKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey)); + } + } + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NO NULLS key case. + * + * (For remaining comments for handleLogicalNoNullsKey). + */ + protected void handlePhysicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + long[] keyVector = keyLongColVector.vector; + + long currLongKey = keyVector[0]; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + final long nextLongKey = keyVector[batchIndex]; + if (currLongKey == nextLongKey) { + + // Equal key series. + } else { + + // Current key ended. + if (currLongKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey)); + } + + // New current key. + currLongKey = nextLongKey; + } + } + // Handle last key. + if (currLongKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey)); + } + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NULLS key case. + * + * (For remaining comments for handleLogicalNullsKey). + * + */ + protected void handlePhysicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + LongColumnVector keyLongColVector) throws HiveException, IOException { + + long[] keyVector = keyLongColVector.vector; + + boolean[] isNull = keyLongColVector.isNull; + + boolean currKeyIsNull; + long currLongKey; + if (isNull[0]) { + currKeyIsNull = true; + currLongKey = 0; + + // We note we encountered a NULL key. + haveNullKey = true; + } else { + currKeyIsNull = false; + currLongKey = keyVector[0]; + } + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + + if (isNull[batchIndex]) { + + if (currKeyIsNull) { + + // NULL key series. + } else { + + // Current non-NULL key ended by NULL key. + if (currLongKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey)); + } + + // New NULL key. + currKeyIsNull = true; + + // We note we encountered a NULL key. + haveNullKey = true; + } + + } else { + final long nextLongKey = keyVector[batchIndex]; + if (currKeyIsNull) { + + // Current NULL key ended by non-NULL key. + currKeyIsNull = false; + + // New non-NULL key. + currLongKey = nextLongKey; + } else if (currLongKey == nextLongKey) { + + // Equal key series. + } else { + + // Current non-NULL key ended by non-NULL key. + if (currLongKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey)); + } + + // New non-NULL key. + currLongKey = nextLongKey; + } + } + } + // Handle last key. + if (!currKeyIsNull) { + if (currLongKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currLongKey, + HashCodeUtil.calculateLongHashCode(currLongKey)); + } + } + } + + /** + * Flush all of the keys of the one long key hash table to the output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + LongColumnVector longKeyColumnVector = (LongColumnVector) outputBatch.cols[0]; + + boolean[] keyIsNull = longKeyColumnVector.isNull; + + if (haveNullKey) { + + // NULL entry to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + keyIsNull[outputBatch.size++] = true; + longKeyColumnVector.noNulls = false; + } + + long[] keyVector = longKeyColumnVector.vector; + + if (haveZeroKey) { + + // Zero key to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + keyVector[outputBatch.size++] = 0; + } + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongDuplicateReductionKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int i = startBatchIndex; i < startBatchIndex + count; i++) { + keyVector[i] = getNext(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/dupred/VectorGroupByHashLongKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/dupred/VectorGroupByHashLongKeyDuplicateReductionTable.java new file mode 100644 index 0000000..2c74231 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/dupred/VectorGroupByHashLongKeyDuplicateReductionTable.java @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.dupred; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.Future; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashTable; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.VectorGroupByHashLongKeyTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * A single long key hash table optimized for Duplicate Reduction Native Vectorized GroupBy */ +public abstract class VectorGroupByHashLongKeyDuplicateReductionTable + extends VectorGroupByHashLongKeyTable { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = + VectorGroupByHashLongKeyDuplicateReductionTable.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyDuplicateReductionTable() { + super(); + } + + public VectorGroupByHashLongKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + allocateBucketArray(LONG_DUPLICATE_REDUCTION_ENTRY_SIZE); + } + + //------------------------------------------------------------------------------------------------ + + private static int LONG_DUPLICATE_REDUCTION_ENTRY_SIZE = 1; + + public void findOrCreateLongDuplicateReductionKey(long key, long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + while (true) { + if (slotMultiples[slot] == 0) { + break; + } + if (key == slotMultiples[slot]) { + // Found it! A duplicate has now been eliminated. + return; + } + ++metricPutConflict; + // Some other key (collision) - keep probing. + probeSlot += (++i); + slot = (int)(probeSlot & logicalHashBucketMask); + } + + if (largestNumberOfSteps < i) { + if (LOG.isDebugEnabled()) { + LOG.debug("Probed " + i + " slots (the longest so far) to find space"); + } + largestNumberOfSteps = i; + } + + // Create first-time key. + slotMultiples[slot] = key; + keyCount++; + } + + private int countKeyIndex; + + protected int initLongDuplicateReductionKeyIterator() { + countKeyIndex = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNext() { + while (true) { + long key = slotMultiples[countKeyIndex++]; + if (key != 0) { + return key; + } + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index d3fbf07..77b7fc33 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -93,6 +93,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.io.NullRowsInputFormat; import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.count.VectorGroupByHashLongKeyCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.count.VectorGroupByHashLongKeyCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.count.VectorGroupByHashLongKeyCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.dupred.VectorGroupByHashLongKeyDuplicateReductionOperator; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -128,9 +132,13 @@ import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; import org.apache.hadoop.hive.ql.plan.VectorPTFDesc; import org.apache.hadoop.hive.ql.plan.VectorPTFInfo; import org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation.SingleCountAggregationKind; import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; import org.apache.hadoop.hive.ql.plan.VectorizationCondition; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; @@ -224,6 +232,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.mapred.InputFormat; @@ -303,6 +312,12 @@ private VectorizationEnabledOverride vectorizationEnabledOverride; boolean isTestForcedVectorizationEnable; + boolean isVectorizationGroupByNativeEnabled; + private VectorizationEnabledOverride vectorizationGroupByNativeEnabledOverride; + boolean isTestForcedVectorizationGroupByNativeEnable; + boolean weCanAttemptGroupByNativeVectorization; + int testGroupByMaxMemoryAvailable; + private boolean useVectorizedInputFileFormat; private boolean useVectorDeserialize; private boolean useRowDeserialize; @@ -2220,6 +2235,44 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE return physicalContext; } + // Native Vector GROUP BY. + isVectorizationGroupByNativeEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED); + + final String testVectorizationGroupByNativeOverrideString = + HiveConf.getVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_OVERRIDE); + vectorizationGroupByNativeEnabledOverride = + VectorizationEnabledOverride.nameMap.get(testVectorizationGroupByNativeOverrideString); + + isTestForcedVectorizationGroupByNativeEnable = false; + switch (vectorizationGroupByNativeEnabledOverride) { + case NONE: + weCanAttemptGroupByNativeVectorization = isVectorizationGroupByNativeEnabled; + break; + case DISABLE: + weCanAttemptGroupByNativeVectorization = false; + break; + case ENABLE: + weCanAttemptGroupByNativeVectorization = true; + isTestForcedVectorizationGroupByNativeEnable = !isVectorizationGroupByNativeEnabled; + + // Different parts of the code rely on this being set... + HiveConf.setBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED, true); + isVectorizationGroupByNativeEnabled = true; + break; + default: + throw new RuntimeException("Unexpected vectorization enabled override " + + vectorizationGroupByNativeEnabledOverride); + } + + testGroupByMaxMemoryAvailable = + HiveConf.getIntVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_MAX_MEMORY_AVAILABLE); + + // Input Format control. useVectorizedInputFileFormat = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT); @@ -3662,6 +3715,219 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi return result; } + private Operator specializeGroupByOperator( + Operator op, VectorizationContext vContext, + GroupByDesc desc, VectorGroupByDesc vectorDesc) + throws HiveException { + + VectorGroupByInfo vectorGroupByInfo = vectorDesc.getVectorGroupByInfo(); + + Operator vectorOp = null; + Class> opClass = null; + + AggregationVariation aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + switch (aggregationVariation) { + case HASH_DUPLICATE_REDUCTION: + opClass = VectorGroupByHashLongKeyDuplicateReductionOperator.class; + break; + + case HASH_SINGLE_COUNT: + { + SingleCountAggregationKind SingleCountAggregationKind = + vectorGroupByInfo.getSingleCountAggregation().getSingleCountAggregationKind(); + + switch (SingleCountAggregationKind) { + case COUNT_STAR: + opClass = VectorGroupByHashLongKeyCountStarOperator.class; + break; + case COUNT_KEY: + opClass = VectorGroupByHashLongKeyCountKeyOperator.class; + break; + case COUNT_COLUMN: + opClass = VectorGroupByHashLongKeyCountColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected single count aggregation kind " + SingleCountAggregationKind); + } + } + break; + + default: + throw new RuntimeException("Unexpected aggregation variation " + aggregationVariation); + } + + vectorDesc.setVectorGroupByInfo(vectorGroupByInfo); + + vectorDesc.setIsNative(true); + + vectorOp = OperatorFactory.getVectorOperator( + opClass, op.getCompilationOpContext(), desc, vContext, vectorDesc); + LOG.info("Vectorizer vectorizeOperator group by class " + vectorOp.getClass().getSimpleName()); + + return vectorOp; + } + + private boolean determineGroupByKeyInfo( + List keysDescs, + VectorExpression[] allKeyExpressions, + int[] groupByKeyColumnMap, + TypeInfo[] groupByKeyTypeInfos, + ColumnVector.Type[] groupByKeyColumnVectorTypes, + List groupByKeyExpressionsList) throws HiveException { + ColumnVector.Type columnVectorType; + boolean keysAreAllFixedLength = true; // Assume. + for (int i = 0; i < groupByKeyColumnMap.length; i++) { + VectorExpression ve = allKeyExpressions[i]; + groupByKeyColumnMap[i] = ve.getOutputColumnNum(); + groupByKeyTypeInfos[i] = keysDescs.get(i).getTypeInfo(); + columnVectorType = + VectorizationContext.getColumnVectorTypeFromTypeInfo(groupByKeyTypeInfos[i]); + + if (columnVectorType == ColumnVector.Type.BYTES) { + keysAreAllFixedLength = false; + } + + groupByKeyColumnVectorTypes[i] = columnVectorType; + if (!IdentityExpression.isColumnOnly(ve)) { + groupByKeyExpressionsList.add(ve); + } + } + return keysAreAllFixedLength; + } + + private boolean canSpecializeGroupBy(GroupByDesc desc, VectorGroupByDesc vectorDesc, + boolean isTezOrSpark, VectorizationContext vContext) throws HiveException { + + String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); + + VectorGroupByInfo vectorGroupByInfo = new VectorGroupByInfo(); + + List vectorizationIssueList = new ArrayList(); + + List keyDescs = desc.getKeys(); + final boolean isEmptyKey = keyDescs.isEmpty(); + final int outputKeyLength = keyDescs.size(); + + GroupByDesc.Mode groupByMode = desc.getMode(); + ProcessingMode processingMode = vectorDesc.getProcessingMode(); + + VectorExpression[] vecKeyExprs = vectorDesc.getKeyExpressions(); + final int vecKeyExprSize = vecKeyExprs.length; + + VectorAggregationDesc[] vecAggrDescs = vectorDesc.getVecAggrDescs(); + final int vecAggrDescSize = (vecAggrDescs == null ? 0 : vecAggrDescs.length); + + List aggrDescList = desc.getAggregators(); + + boolean isHash = (groupByMode == GroupByDesc.Mode.HASH); + final AggregationVariation aggregationVariation; + + SingleCountAggregation singleCountAggregation = null; + + if (isHash && vecAggrDescSize == 0) { + + // No aggregations just means the key is being grouped. We are getting rid of duplicate keys. + + aggregationVariation = AggregationVariation.HASH_DUPLICATE_REDUCTION; + singleCountAggregation = null; + + } else if (isHash && vecKeyExprSize == 1 && vecAggrDescSize == 1 && + aggrDescList.get(0).getGenericUDAFName().equalsIgnoreCase("count")) { + + // Single COUNT aggregation specialization. Store key and count in hash table without a + // hash element. + + AggregationDesc countAggrDesc = aggrDescList.get(0); + List countParamList = countAggrDesc.getParameters(); + final int countParamSize = countParamList.size(); + if (countParamSize == 0) { + + // COUNT(*) + + aggregationVariation = AggregationVariation.HASH_SINGLE_COUNT; + singleCountAggregation = + new SingleCountAggregation(SingleCountAggregationKind.COUNT_STAR); + + } else if (countParamSize == 1) { + + aggregationVariation = AggregationVariation.HASH_SINGLE_COUNT; + + VectorAggregationDesc countVecAggrDesc = vecAggrDescs[0]; + + final int inputColumnNum = countVecAggrDesc.getInputExpression().getOutputColumnNum(); + + boolean isKey = false; + for (VectorExpression vecKeyExpr : vecKeyExprs) { + if (vecKeyExpr.getOutputColumnNum() == inputColumnNum) { + isKey = true; + break; + } + } + if (isKey) { + singleCountAggregation = + new SingleCountAggregation(SingleCountAggregationKind.COUNT_KEY); + } else { + singleCountAggregation = + new SingleCountAggregation(SingleCountAggregationKind.COUNT_COLUMN, inputColumnNum); + } + } else { + + aggregationVariation = AggregationVariation.NONE; + + vectorizationIssueList.add( + "Cannot specialize aggregation function " + countAggrDesc.getGenericUDAFName() + + " that has more than 1 input parameter"); + } + + } else { + + // FUTURE: More aggregations. + aggregationVariation = AggregationVariation.NONE; + } + + // TEMPORARY: Restriction + boolean isOneLongKey = + (vecKeyExprSize == 1 && vecKeyExprs[0].getOutputColumnVectorType() == Type.LONG); + + final VectorGroupByInfo.HashTableKeyType hashTableKeyType; + if (isOneLongKey) { + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.LONG; + } else { + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.NONE; + } + + vectorGroupByInfo.setIsVectorizationGroupByNativeEnabled( + weCanAttemptGroupByNativeVectorization); + vectorGroupByInfo.setEngine(engine); + + // Temporary restrictions... + vectorGroupByInfo.setIsOneLongKey(isOneLongKey); + + vectorGroupByInfo.setVectorizationIssueList(vectorizationIssueList); + + vectorGroupByInfo.setAggregationVariation(aggregationVariation); + vectorGroupByInfo.setSingleCountAggregation(singleCountAggregation); + + vectorGroupByInfo.setHashTableKeyType(hashTableKeyType); + + vectorGroupByInfo.setTestGroupByMaxMemoryAvailable(testGroupByMaxMemoryAvailable); + + // So EXPLAIN VECTORIZATION can show native conditions, etc. + vectorDesc.setVectorGroupByInfo(vectorGroupByInfo); + + if (!weCanAttemptGroupByNativeVectorization || + !isTezOrSpark || + !isOneLongKey || + (aggregationVariation == AggregationVariation.NONE) || + groupByMode != GroupByDesc.Mode.HASH || + vectorizationIssueList.size() > 0) { + return false; + } + + return true; + } + private Operator specializeReduceSinkOperator( Operator op, VectorizationContext vContext, ReduceSinkDesc desc, VectorReduceSinkDesc vectorDesc) throws HiveException { @@ -4232,16 +4498,30 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { Operator groupByOp, VectorizationContext vContext, VectorGroupByDesc vectorGroupByDesc) throws HiveException { - ImmutablePair,String> pair = + String issue = + doVectorizeGroupByOperatorPreparation( + groupByOp, vContext, vectorGroupByDesc); + Preconditions.checkState(issue == null); + return doVectorizeGroupByOperator( groupByOp, vContext, vectorGroupByDesc); - return pair.left; + } + + private static Operator doVectorizeGroupByOperator( + Operator groupByOp, VectorizationContext vContext, + VectorGroupByDesc vectorGroupByDesc) + throws HiveException { + Operator vectorOp = + OperatorFactory.getVectorOperator( + groupByOp.getCompilationOpContext(), (GroupByDesc) groupByOp.getConf(), + vContext, vectorGroupByDesc); + return vectorOp; } /* * NOTE: The VectorGroupByDesc has already been allocated and will be updated here. */ - private static ImmutablePair,String> doVectorizeGroupByOperator( + private static String doVectorizeGroupByOperatorPreparation( Operator groupByOp, VectorizationContext vContext, VectorGroupByDesc vectorGroupByDesc) throws HiveException { @@ -4263,7 +4543,7 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { ImmutablePair pair = getVectorAggregationDesc(aggDesc, vContext); if (pair.left == null) { - return new ImmutablePair, String>(null, pair.right); + return pair.right; } vecAggrDescs[i] = pair.left; @@ -4274,14 +4554,9 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { vectorGroupByDesc.setKeyExpressions(vecKeyExpressions); vectorGroupByDesc.setVecAggrDescs(vecAggrDescs); vectorGroupByDesc.setProjectedOutputColumns(projectedOutputColumns); - Operator vectorOp = - OperatorFactory.getVectorOperator( - groupByOp.getCompilationOpContext(), groupByDesc, - vContext, vectorGroupByDesc); - return new ImmutablePair, String>(vectorOp, null); - } - static int fake; + return null; // No issue. + } public static Operator vectorizeSelectOperator( Operator selectOp, VectorizationContext vContext, @@ -4830,23 +5105,40 @@ private static VectorPTFInfo createVectorPTFInfo(Operator,String> pair = - doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc); - if (pair.left == null) { - setOperatorIssue(pair.right); + String issue = + doVectorizeGroupByOperatorPreparation(op, vContext, vectorGroupByDesc); + if (issue != null) { + setOperatorIssue(issue); throw new VectorizerCannotVectorizeException(); } - vectorOp = pair.left; - isNative = false; + + GroupByDesc groupByDesc = (GroupByDesc) op.getConf(); + boolean specialize = + canSpecializeGroupBy(groupByDesc, vectorGroupByDesc, isTezOrSpark, vContext); + + if (!specialize) { + + vectorOp = + doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc); + isNative = false; + + } else { + + vectorOp = + specializeGroupByOperator(op, vContext, groupByDesc, vectorGroupByDesc); + isNative = true; + } if (vectorTaskColumnInfo != null) { VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions(); if (usesVectorUDFAdaptor(vecKeyExpressions)) { vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); } VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs(); - for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { - if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) { - vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + if (vecAggrDescs != null) { + for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { + if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 31237c8..9c4c6c5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -20,8 +20,10 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; +import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; @@ -31,7 +33,10 @@ import org.apache.hive.common.util.AnnotationUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; - +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation.SingleCountAggregationKind; /** * GroupByDesc. @@ -324,26 +329,38 @@ public Object clone() { this.groupingSetPosition, this.isDistinct); } + // Use LinkedHashSet to give predictable display order. + private static final Set vectorizableGroupByNativeEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + public class GroupByOperatorExplainVectorization extends OperatorExplainVectorization { private final GroupByDesc groupByDesc; private final VectorGroupByDesc vectorGroupByDesc; + private final VectorGroupByInfo vectorGroupByInfo; + + private VectorizationCondition[] nativeConditions; public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, VectorGroupByDesc vectorGroupByDesc) { - // Native vectorization not supported. - super(vectorGroupByDesc, false); + super(vectorGroupByDesc, vectorGroupByDesc.isNative()); this.groupByDesc = groupByDesc; this.vectorGroupByDesc = vectorGroupByDesc; + vectorGroupByInfo = vectorGroupByDesc.getVectorGroupByInfo(); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getKeysExpression() { return vectorExpressionsToStringList(vectorGroupByDesc.getKeyExpressions()); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getAggregators() { + if (isNative) { + return null; + } VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs(); List vecAggrList = new ArrayList(vecAggrDescs.length); for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { @@ -352,17 +369,20 @@ public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, return vecAggrList; } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorProcessingMode", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorProcessingMode", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getProcessingMode() { return vectorGroupByDesc.getProcessingMode().name(); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "groupByMode", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "groupByMode", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getGroupByMode() { return groupByDesc.getMode().name(); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getVectorOutputConditionsNotMet() { List results = new ArrayList(); @@ -379,13 +399,109 @@ public String getGroupByMode() { return results; } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumnNums", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getProjectedOutputColumnNums() { return Arrays.toString(vectorGroupByDesc.getProjectedOutputColumns()); } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorGroupByInfo.getIsVectorizationGroupByNativeEnabled(); + + String engine = vectorGroupByInfo.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableGroupByNativeEngines; + boolean engineInSupported = vectorizableGroupByNativeEngines.contains(engine); + + final List vectorizationIssueList = vectorGroupByInfo.getVectorizationIssueList(); + + List conditionList = new ArrayList(); + conditionList.add( + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED.varname)); + conditionList.add( + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName)); + conditionList.add( + new VectorizationCondition( + vectorGroupByInfo.getIsOneLongKey(), + "One Long Key")); + AggregationVariation aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + conditionList.add( + new VectorizationCondition( + (aggregationVariation == AggregationVariation.HASH_SINGLE_COUNT || + aggregationVariation == AggregationVariation.HASH_DUPLICATE_REDUCTION), + "Single COUNT aggregation or Duplicate Reduction")); + conditionList.add( + new VectorizationCondition( + (vectorGroupByDesc.getProcessingMode() == ProcessingMode.HASH), + "Group By Mode HASH")); + if (vectorizationIssueList.size() != 0) { + conditionList.add( + new VectorizationCondition( + true, + "Has issues \"" + + vectorizationIssueList.toString() + "\"")); + } + + VectorizationCondition[] conditions = + conditionList.toArray(new VectorizationCondition[0]); + + return conditions; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + + // For now, just report native conditions met / not met for HASH mode. + // It dramatically limits the number of Q file differences. + if (vectorGroupByDesc.getProcessingMode() != ProcessingMode.HASH) { + return null; + } + + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + + // For now, just report native conditions met / not met for HASH mode. + // It dramatically limits the number of Q file differences. + if (vectorGroupByDesc.getProcessingMode() != ProcessingMode.HASH) { + return null; + } + + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "singleCountAggreation", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getSingleCountAggreation() { + if (!isNative) { + return null; + } + final SingleCountAggregationKind singleCountAggregationKind = + vectorGroupByInfo.getSingleCountAggregation().getSingleCountAggregationKind(); + if (singleCountAggregationKind == SingleCountAggregationKind.NONE) { + return null; + } + return singleCountAggregationKind.name(); + } } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public GroupByOperatorExplainVectorization getGroupByVectorization() { VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) getVectorDesc(); if (vectorGroupByDesc == null) { @@ -404,11 +520,14 @@ public static String getComplexTypeEnabledCondition( public static String getComplexTypeWithGroupByEnabledCondition( boolean isVectorizationComplexTypesEnabled, boolean isVectorizationGroupByComplexTypesEnabled) { - final boolean enabled = (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled); + final boolean enabled = + (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled); return "(" + - HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED.varname + " " + isVectorizationComplexTypesEnabled + + HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED.varname + " " + + isVectorizationComplexTypesEnabled + " AND " + - HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED.varname + " " + isVectorizationGroupByComplexTypesEnabled + + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED.varname + " " + + isVectorizationGroupByComplexTypesEnabled + ") IS " + enabled; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index caf0c67..b7e60f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -67,8 +67,12 @@ private boolean isVectorizationComplexTypesEnabled; private boolean isVectorizationGroupByComplexTypesEnabled; + private boolean isNative; + private VectorGroupByInfo vectorGroupByInfo; + public VectorGroupByDesc() { - this.processingMode = ProcessingMode.NONE; + processingMode = ProcessingMode.NONE; + isNative = false; } public void setProcessingMode(ProcessingMode processingMode) { @@ -78,6 +82,14 @@ public ProcessingMode getProcessingMode() { return processingMode; } + public void setIsNative(boolean isNative) { + this.isNative = isNative; + } + + public boolean isNative() { + return isNative; + } + public void setKeyExpressions(VectorExpression[] keyExpressions) { this.keyExpressions = keyExpressions; } @@ -118,6 +130,14 @@ public boolean getIsVectorizationGroupByComplexTypesEnabled() { return isVectorizationGroupByComplexTypesEnabled; } + public void setVectorGroupByInfo(VectorGroupByInfo vectorGroupByInfo) { + this.vectorGroupByInfo = vectorGroupByInfo; + } + + public VectorGroupByInfo getVectorGroupByInfo() { + return vectorGroupByInfo; + } + /** * Which ProcessingMode for VectorGroupByOperator? * diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java new file mode 100644 index 0000000..6b4db74 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import com.google.common.base.Preconditions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; + +/** + * VectorGroupByInfo. + * + * A convenience data structure that has information needed to vectorize group by. + * + * It is created by the Vectorizer when it is determining whether it can specialize so the + * information doesn't have to be recreated again and agains by the VectorGroupByOperator's + * constructors and later during execution. + */ +public class VectorGroupByInfo { + + private static long serialVersionUID = 1L; + + public static enum HashTableKeyType { + NONE, + LONG + } + + //------------------------------------------------------------------------------------------------ + + public static enum AggregationVariation { + NONE, + HASH_SINGLE_COUNT, + HASH_DUPLICATE_REDUCTION + } + + public static class SingleCountAggregation { + + public enum SingleCountAggregationKind { + NONE, + COUNT_STAR, + COUNT_KEY, + COUNT_COLUMN + } + + private final SingleCountAggregationKind singleCountAggregationKind; + private final int countColumnNum; + + public SingleCountAggregation(SingleCountAggregationKind singleCountAggregationKind) { + this.singleCountAggregationKind = singleCountAggregationKind; + countColumnNum = -1; + } + + public SingleCountAggregation(SingleCountAggregationKind singleCountAggregationKind, + int countColumnNum) { + this.singleCountAggregationKind = singleCountAggregationKind; + this.countColumnNum = countColumnNum; + } + + public SingleCountAggregationKind getSingleCountAggregationKind() { + return singleCountAggregationKind; + } + + public int getCountColumnNum() { + return countColumnNum; + } + } + + //--------------------------------------------------------------------------- + + private boolean isVectorizationGroupByNativeEnabled; + private String engine; + + // Temporary restrictions... + private boolean isOneLongKey; + + private List vectorizationIssueList; + + private AggregationVariation aggregationVariation; + private SingleCountAggregation singleCountAggregation; + + private HashTableKeyType hashTableKeyType; + + private int testGroupByMaxMemoryAvailable; + + public VectorGroupByInfo() { + isVectorizationGroupByNativeEnabled = false; + + isOneLongKey = false; + + vectorizationIssueList = null; + + hashTableKeyType = HashTableKeyType.NONE; + + testGroupByMaxMemoryAvailable = -1; + } + + public boolean getIsVectorizationGroupByNativeEnabled() { + return isVectorizationGroupByNativeEnabled; + } + + public void setIsVectorizationGroupByNativeEnabled(boolean isVectorizationGroupByNativeEnabled) { + this.isVectorizationGroupByNativeEnabled = isVectorizationGroupByNativeEnabled; + } + + public String getEngine() { + return engine; + } + + public void setEngine(String engine) { + this.engine = engine; + } + + public boolean getIsOneLongKey() { + return isOneLongKey; + } + + public void setIsOneLongKey(boolean oneLongKey) { + this.isOneLongKey = oneLongKey; + } + + public List getVectorizationIssueList() { + return vectorizationIssueList; + } + + public void setVectorizationIssueList(List vectorizationIssueList) { + this.vectorizationIssueList = vectorizationIssueList; + } + + public void setAggregationVariation(AggregationVariation aggregationVariation) { + this.aggregationVariation = aggregationVariation; + } + + public AggregationVariation getAggregationVariation() { + return aggregationVariation; + } + + public void setSingleCountAggregation(SingleCountAggregation singleCountAggregation) { + this.singleCountAggregation = singleCountAggregation; + } + + public SingleCountAggregation getSingleCountAggregation() { + return singleCountAggregation; + } + + public HashTableKeyType getHashTableKeyType() { + return hashTableKeyType; + } + + public void setHashTableKeyType(HashTableKeyType hashTableKeyType) { + this.hashTableKeyType = hashTableKeyType; + } + + public int getTestGroupByMaxMemoryAvailable() { + return testGroupByMaxMemoryAvailable; + } + + public void setTestGroupByMaxMemoryAvailable(int testGroupByMaxMemoryAvailable) { + this.testGroupByMaxMemoryAvailable = testGroupByMaxMemoryAvailable; + } +} diff --git ql/src/test/queries/clientpositive/vector_count_simple.q ql/src/test/queries/clientpositive/vector_count_simple.q new file mode 100644 index 0000000..2fd77e5 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_count_simple.q @@ -0,0 +1,58 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.groupby.native.enabled=true; + +-- SORT_QUERY_RESULTS + +create table abcd_txt (a int, b int, c int, d int); +LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd_txt; + +create table abcd stored as orc as select * from abcd_txt; + +-- One of each SingleCountAggregation variation. +select * from abcd; +set hive.map.aggr=true; +explain vectorization expression +select a, count(a) from abcd group by a; +select a, count(a) from abcd group by a; + +explain vectorization expression +select a, count(b) from abcd group by a; +select a, count(b) from abcd group by a; + +explain vectorization expression +select a, count(*) from abcd group by a; +select a, count(*) from abcd group by a; + + +CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k; + +set hive.test.vectorized.groupby.native.max.memory.available=1024; + +explain vectorization expression +select i, count(i) from over10k group by i; +select i, count(i) from over10k group by i; + +explain vectorization expression +select i, count(b) from over10k group by i; +select i, count(b) from over10k group by i; + +explain vectorization expression +select i, count(*) from over10k group by i; +select i, count(*) from over10k group by i; \ No newline at end of file diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 89b7169..e6e2177 100644 --- ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -1497,6 +1497,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string, col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] Reduce Sink Vectorization: @@ -1594,6 +1596,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -1608,6 +1612,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -2284,6 +2290,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string, col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] Reduce Sink Vectorization: @@ -2381,6 +2389,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -2395,6 +2405,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: diff --git ql/src/test/results/clientpositive/llap/llap_partitioned.q.out ql/src/test/results/clientpositive/llap/llap_partitioned.q.out index 799062e..c0fd24d 100644 --- ql/src/test/results/clientpositive/llap/llap_partitioned.q.out +++ ql/src/test/results/clientpositive/llap/llap_partitioned.q.out @@ -1721,10 +1721,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 10:tinyint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: tinyint) @@ -1748,7 +1749,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -2106,6 +2107,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out index 2c13d5d..e9519c2 100644 --- ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out +++ ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out @@ -73,6 +73,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -252,6 +254,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index 1e090f0..f51ab30 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -146,6 +146,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -286,6 +288,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -426,6 +430,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index c99ac8d..fbc46cf 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -85,6 +85,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index 54216fa..7a6eaf1 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -270,6 +270,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index 6093beb..dc9f5ac 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -166,6 +166,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -370,6 +372,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -764,6 +768,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1111,11 +1117,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1140,7 +1146,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1249,11 +1255,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1278,7 +1284,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1387,11 +1393,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1416,7 +1422,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 @@ -1525,11 +1531,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1554,7 +1560,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index aabfc73..8a56b79 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -175,6 +175,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -370,6 +372,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:binary native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bin (type: binary) diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index 861ae9a..c7c0681 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -149,6 +149,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index 9a43659..888d2ae 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -110,6 +110,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) @@ -306,6 +308,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index e8bb722..8943c6e 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -78,6 +78,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -303,6 +305,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index f2277c1..f31862b 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -940,6 +940,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1162,6 +1164,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: str (type: string) @@ -1305,6 +1309,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index 90086ea..bd0fc55 100644 --- ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1265,10 +1265,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 16:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ws_order_number (type: int) @@ -1292,7 +1293,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1323,6 +1324,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_count_simple.q.out ql/src/test/results/clientpositive/llap/vector_count_simple.q.out new file mode 100644 index 0000000..fecb385 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_count_simple.q.out @@ -0,0 +1,1588 @@ +PREHOOK: query: create table abcd_txt (a int, b int, c int, d int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@abcd_txt +POSTHOOK: query: create table abcd_txt (a int, b int, c int, d int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@abcd_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@abcd_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@abcd_txt +PREHOOK: query: create table abcd stored as orc as select * from abcd_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@abcd_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@abcd +POSTHOOK: query: create table abcd stored as orc as select * from abcd_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@abcd_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@abcd +POSTHOOK: Lineage: abcd.a SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:a, type:int, comment:null), ] +POSTHOOK: Lineage: abcd.b SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: abcd.c SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:c, type:int, comment:null), ] +POSTHOOK: Lineage: abcd.d SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:d, type:int, comment:null), ] +PREHOOK: query: select * from abcd +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select * from abcd +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +10 100 45 4 +10 100 NULL 5 +10 1000 50 1 +100 100 10 3 +12 100 75 7 +12 NULL 80 2 +NULL 35 23 6 +PREHOOK: query: explain vectorization expression +select a, count(a) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select a, count(a) from abcd group by a +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: a (type: int) + outputColumnNames: a + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(a) + Group By Vectorization: + className: VectorGroupByHashLongKeyCountKeyOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: a (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, count(a) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select a, count(a) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +10 3 +100 1 +12 2 +NULL 0 +PREHOOK: query: explain vectorization expression +select a, count(b) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select a, count(b) from abcd group by a +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: a, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b) + Group By Vectorization: + className: VectorGroupByHashLongKeyCountColumnOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: a (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, count(b) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select a, count(b) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +10 3 +100 1 +12 1 +NULL 1 +PREHOOK: query: explain vectorization expression +select a, count(*) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select a, count(*) from abcd group by a +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: a (type: int) + outputColumnNames: a + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeyCountStarOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: a (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, count(*) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select a, count(*) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +10 3 +100 1 +12 2 +NULL 1 +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization expression +select i, count(i) from over10k group by i +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select i, count(i) from over10k group by i +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(i) + Group By Vectorization: + className: VectorGroupByHashLongKeyCountKeyOperator + groupByMode: HASH + keyExpressions: col 2:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(i) from over10k group by i +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(i) from over10k group by i +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +65546 34 +65547 47 +65548 44 +65549 34 +65550 36 +65551 32 +65552 36 +65553 45 +65554 44 +65555 40 +65556 40 +65557 35 +65558 41 +65559 28 +65560 44 +65561 24 +65562 50 +65563 41 +65564 44 +65565 31 +65566 46 +65567 31 +65568 48 +65569 44 +65570 42 +65571 39 +65572 38 +65573 36 +65574 31 +65575 44 +65576 36 +65577 44 +65578 47 +65579 30 +65580 42 +65581 46 +65582 37 +65583 40 +65584 31 +65585 38 +65586 35 +65587 40 +65588 37 +65589 43 +65590 42 +65591 37 +65592 39 +65593 38 +65594 41 +65595 53 +65596 33 +65597 26 +65598 31 +65599 34 +65600 49 +65601 37 +65602 35 +65603 41 +65604 45 +65605 38 +65606 46 +65607 40 +65608 26 +65609 51 +65610 30 +65611 40 +65612 35 +65613 28 +65614 32 +65615 34 +65616 34 +65617 44 +65618 45 +65619 34 +65620 44 +65621 37 +65622 37 +65623 41 +65624 53 +65625 34 +65626 39 +65627 49 +65628 29 +65629 41 +65630 30 +65631 30 +65632 33 +65633 34 +65634 32 +65635 39 +65636 25 +65637 38 +65638 32 +65639 35 +65640 36 +65641 43 +65642 32 +65643 35 +65644 48 +65645 38 +65646 37 +65647 39 +65648 44 +65649 40 +65650 47 +65651 37 +65652 33 +65653 30 +65654 39 +65655 44 +65656 37 +65657 41 +65658 53 +65659 38 +65660 35 +65661 40 +65662 36 +65663 36 +65664 42 +65665 37 +65666 39 +65667 34 +65668 25 +65669 58 +65670 39 +65671 42 +65672 39 +65673 41 +65674 41 +65675 44 +65676 42 +65677 44 +65678 31 +65679 31 +65680 36 +65681 39 +65682 43 +65683 43 +65684 46 +65685 36 +65686 40 +65687 35 +65688 38 +65689 35 +65690 35 +65691 38 +65692 41 +65693 33 +65694 38 +65695 45 +65696 42 +65697 35 +65698 48 +65699 45 +65700 40 +65701 41 +65702 46 +65703 47 +65704 37 +65705 38 +65706 36 +65707 39 +65708 36 +65709 38 +65710 43 +65711 38 +65712 39 +65713 41 +65714 34 +65715 44 +65716 36 +65717 47 +65718 39 +65719 44 +65720 34 +65721 49 +65722 33 +65723 34 +65724 39 +65725 43 +65726 50 +65727 48 +65728 43 +65729 36 +65730 30 +65731 31 +65732 46 +65733 37 +65734 45 +65735 49 +65736 27 +65737 55 +65738 45 +65739 42 +65740 39 +65741 26 +65742 38 +65743 39 +65744 42 +65745 44 +65746 40 +65747 45 +65748 42 +65749 36 +65750 30 +65751 46 +65752 48 +65753 36 +65754 33 +65755 44 +65756 50 +65757 37 +65758 43 +65759 47 +65760 44 +65761 28 +65762 43 +65763 30 +65764 46 +65765 28 +65766 44 +65767 38 +65768 32 +65769 43 +65770 44 +65771 51 +65772 36 +65773 49 +65774 45 +65775 42 +65776 44 +65777 37 +65778 43 +65779 47 +65780 41 +65781 31 +65782 40 +65783 43 +65784 39 +65785 41 +65786 34 +65787 37 +65788 44 +65789 41 +65790 32 +65791 32 +PREHOOK: query: explain vectorization expression +select i, count(b) from over10k group by i +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select i, count(b) from over10k group by i +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b) + Group By Vectorization: + className: VectorGroupByHashLongKeyCountColumnOperator + groupByMode: HASH + keyExpressions: col 2:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(b) from over10k group by i +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(b) from over10k group by i +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +65546 34 +65547 47 +65548 44 +65549 34 +65550 36 +65551 32 +65552 36 +65553 45 +65554 44 +65555 40 +65556 40 +65557 35 +65558 41 +65559 28 +65560 44 +65561 24 +65562 50 +65563 41 +65564 44 +65565 31 +65566 46 +65567 31 +65568 48 +65569 44 +65570 42 +65571 39 +65572 38 +65573 36 +65574 31 +65575 44 +65576 36 +65577 44 +65578 47 +65579 30 +65580 42 +65581 46 +65582 37 +65583 40 +65584 31 +65585 38 +65586 35 +65587 40 +65588 37 +65589 43 +65590 42 +65591 37 +65592 39 +65593 38 +65594 41 +65595 53 +65596 33 +65597 26 +65598 31 +65599 34 +65600 49 +65601 37 +65602 35 +65603 41 +65604 45 +65605 38 +65606 46 +65607 40 +65608 26 +65609 51 +65610 30 +65611 40 +65612 35 +65613 28 +65614 32 +65615 34 +65616 34 +65617 44 +65618 45 +65619 34 +65620 44 +65621 37 +65622 37 +65623 41 +65624 53 +65625 34 +65626 39 +65627 49 +65628 29 +65629 41 +65630 30 +65631 30 +65632 33 +65633 34 +65634 32 +65635 39 +65636 25 +65637 38 +65638 32 +65639 35 +65640 36 +65641 43 +65642 32 +65643 35 +65644 48 +65645 38 +65646 37 +65647 39 +65648 44 +65649 40 +65650 47 +65651 37 +65652 33 +65653 30 +65654 39 +65655 44 +65656 37 +65657 41 +65658 53 +65659 38 +65660 35 +65661 40 +65662 36 +65663 36 +65664 42 +65665 37 +65666 39 +65667 34 +65668 25 +65669 58 +65670 39 +65671 42 +65672 39 +65673 41 +65674 41 +65675 44 +65676 42 +65677 44 +65678 31 +65679 31 +65680 36 +65681 39 +65682 43 +65683 43 +65684 46 +65685 36 +65686 40 +65687 35 +65688 38 +65689 35 +65690 35 +65691 38 +65692 41 +65693 33 +65694 38 +65695 45 +65696 42 +65697 35 +65698 48 +65699 45 +65700 40 +65701 41 +65702 46 +65703 47 +65704 37 +65705 38 +65706 36 +65707 39 +65708 36 +65709 38 +65710 43 +65711 38 +65712 39 +65713 41 +65714 34 +65715 44 +65716 36 +65717 47 +65718 39 +65719 44 +65720 34 +65721 49 +65722 33 +65723 34 +65724 39 +65725 43 +65726 50 +65727 48 +65728 43 +65729 36 +65730 30 +65731 31 +65732 46 +65733 37 +65734 45 +65735 49 +65736 27 +65737 55 +65738 45 +65739 42 +65740 39 +65741 26 +65742 38 +65743 39 +65744 42 +65745 44 +65746 40 +65747 45 +65748 42 +65749 36 +65750 30 +65751 46 +65752 48 +65753 36 +65754 33 +65755 44 +65756 50 +65757 37 +65758 43 +65759 47 +65760 44 +65761 28 +65762 43 +65763 30 +65764 46 +65765 28 +65766 44 +65767 38 +65768 32 +65769 43 +65770 44 +65771 51 +65772 36 +65773 49 +65774 45 +65775 42 +65776 44 +65777 37 +65778 43 +65779 47 +65780 41 +65781 31 +65782 40 +65783 43 +65784 39 +65785 41 +65786 34 +65787 37 +65788 44 +65789 41 +65790 32 +65791 32 +PREHOOK: query: explain vectorization expression +select i, count(*) from over10k group by i +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select i, count(*) from over10k group by i +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeyCountStarOperator + groupByMode: HASH + keyExpressions: col 2:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(*) from over10k group by i +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(*) from over10k group by i +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +65546 34 +65547 47 +65548 44 +65549 34 +65550 36 +65551 32 +65552 36 +65553 45 +65554 44 +65555 40 +65556 40 +65557 35 +65558 41 +65559 28 +65560 44 +65561 24 +65562 50 +65563 41 +65564 44 +65565 31 +65566 46 +65567 31 +65568 48 +65569 44 +65570 42 +65571 39 +65572 38 +65573 36 +65574 31 +65575 44 +65576 36 +65577 44 +65578 47 +65579 30 +65580 42 +65581 46 +65582 37 +65583 40 +65584 31 +65585 38 +65586 35 +65587 40 +65588 37 +65589 43 +65590 42 +65591 37 +65592 39 +65593 38 +65594 41 +65595 53 +65596 33 +65597 26 +65598 31 +65599 34 +65600 49 +65601 37 +65602 35 +65603 41 +65604 45 +65605 38 +65606 46 +65607 40 +65608 26 +65609 51 +65610 30 +65611 40 +65612 35 +65613 28 +65614 32 +65615 34 +65616 34 +65617 44 +65618 45 +65619 34 +65620 44 +65621 37 +65622 37 +65623 41 +65624 53 +65625 34 +65626 39 +65627 49 +65628 29 +65629 41 +65630 30 +65631 30 +65632 33 +65633 34 +65634 32 +65635 39 +65636 25 +65637 38 +65638 32 +65639 35 +65640 36 +65641 43 +65642 32 +65643 35 +65644 48 +65645 38 +65646 37 +65647 39 +65648 44 +65649 40 +65650 47 +65651 37 +65652 33 +65653 30 +65654 39 +65655 44 +65656 37 +65657 41 +65658 53 +65659 38 +65660 35 +65661 40 +65662 36 +65663 36 +65664 42 +65665 37 +65666 39 +65667 34 +65668 25 +65669 58 +65670 39 +65671 42 +65672 39 +65673 41 +65674 41 +65675 44 +65676 42 +65677 44 +65678 31 +65679 31 +65680 36 +65681 39 +65682 43 +65683 43 +65684 46 +65685 36 +65686 40 +65687 35 +65688 38 +65689 35 +65690 35 +65691 38 +65692 41 +65693 33 +65694 38 +65695 45 +65696 42 +65697 35 +65698 48 +65699 45 +65700 40 +65701 41 +65702 46 +65703 47 +65704 37 +65705 38 +65706 36 +65707 39 +65708 36 +65709 38 +65710 43 +65711 38 +65712 39 +65713 41 +65714 34 +65715 44 +65716 36 +65717 47 +65718 39 +65719 44 +65720 34 +65721 49 +65722 33 +65723 34 +65724 39 +65725 43 +65726 50 +65727 48 +65728 43 +65729 36 +65730 30 +65731 31 +65732 46 +65733 37 +65734 45 +65735 49 +65736 27 +65737 55 +65738 45 +65739 42 +65740 39 +65741 26 +65742 38 +65743 39 +65744 42 +65745 44 +65746 40 +65747 45 +65748 42 +65749 36 +65750 30 +65751 46 +65752 48 +65753 36 +65754 33 +65755 44 +65756 50 +65757 37 +65758 43 +65759 47 +65760 44 +65761 28 +65762 43 +65763 30 +65764 46 +65765 28 +65766 44 +65767 38 +65768 32 +65769 43 +65770 44 +65771 51 +65772 36 +65773 49 +65774 45 +65775 42 +65776 44 +65777 37 +65778 43 +65779 47 +65780 41 +65781 31 +65782 40 +65783 43 +65784 39 +65785 41 +65786 34 +65787 37 +65788 44 +65789 41 +65790 32 +65791 32 diff --git ql/src/test/results/clientpositive/llap/vector_data_types.q.out ql/src/test/results/clientpositive/llap/vector_data_types.q.out index 961261d..2880469 100644 --- ql/src/test/results/clientpositive/llap/vector_data_types.q.out +++ ql/src/test/results/clientpositive/llap/vector_data_types.q.out @@ -385,6 +385,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index 902d137..0661cc1 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -88,6 +88,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -268,6 +270,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -482,6 +486,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -682,6 +688,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 50e4305..ad5f0b3 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -592,6 +592,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -1211,6 +1213,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index c6867f8..5d0582b 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -2304,6 +2304,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: value (type: int) @@ -3245,6 +3247,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -3409,6 +3413,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -3655,6 +3661,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3786,6 +3794,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3917,6 +3927,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -6300,6 +6312,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: value (type: int) @@ -7247,6 +7261,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -7412,6 +7428,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -7659,6 +7677,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7791,6 +7811,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7923,6 +7945,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out index 73d04a9..c8e1da7 100644 --- ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out @@ -144,6 +144,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out index 127d8ad..8a5a7d4 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out @@ -146,6 +146,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 3bfbda0..b792de5 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -60,6 +60,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -198,6 +200,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -362,6 +366,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -623,6 +629,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -978,6 +986,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -1013,6 +1023,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out index 9a2f5d8..63a0bff 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -72,6 +72,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -231,6 +233,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -390,6 +394,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -543,6 +549,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -696,6 +704,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -856,6 +866,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out index 6005fb2..45fb70b 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out @@ -75,6 +75,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -277,6 +279,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -489,6 +493,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -603,13 +609,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1 @@ -796,6 +803,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -910,13 +919,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1 @@ -1099,6 +1109,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1427,6 +1439,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1748,6 +1762,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1914,6 +1930,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1987,13 +2005,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1 @@ -2132,6 +2151,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out index d8e6b3f..34ab1a9 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -82,6 +82,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -258,6 +260,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out index 3586eae..a5a2631 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -90,6 +90,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -252,6 +254,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -414,6 +418,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -576,6 +582,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -732,6 +740,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) @@ -888,6 +898,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string) @@ -1029,6 +1041,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: double) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out index b072ffc..5a940b6 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string) @@ -257,6 +259,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string) @@ -661,6 +665,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out index 74caa3f..7798516 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out @@ -83,6 +83,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -222,6 +224,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -387,6 +391,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out index b896193..4efc7c0 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -84,6 +84,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -328,6 +330,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -603,6 +607,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out index 8da5735..99bf9d4 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -146,6 +148,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 2:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -268,6 +272,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -338,6 +344,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 2:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -487,6 +495,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -557,6 +567,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out index 6c4ae65..ca3535d 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -219,6 +221,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out index 80e073b..79e31cc 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -237,6 +239,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -405,6 +409,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -574,6 +580,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -780,6 +788,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -941,6 +951,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1109,6 +1121,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1271,6 +1285,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1478,6 +1494,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1634,6 +1652,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1792,6 +1812,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1941,6 +1963,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2107,6 +2131,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2273,6 +2299,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2434,6 +2462,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out index e67bca7..5aa95cb 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -277,6 +279,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -478,6 +482,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -677,6 +683,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) @@ -872,6 +880,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string) @@ -1056,6 +1066,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: double) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index dc3363d..ea48553 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -74,6 +74,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: category (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index 98e6e54..41274db 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -89,6 +89,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -108,6 +110,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index d90ebf0..569f04b 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -268,10 +268,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -296,7 +297,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -467,10 +468,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -494,7 +496,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -763,6 +765,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_item_sk (type: int) @@ -829,6 +833,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 4:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: 1 (type: int) @@ -987,6 +993,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int, col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_ticket_number (type: int), ss_item_sk (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index ef49d90..5c97051 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -72,6 +72,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -327,6 +329,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -673,6 +677,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -708,6 +714,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out index 79ca6d9..c8cb1f5 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out @@ -77,6 +77,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -498,6 +500,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -569,6 +573,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -679,10 +685,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 4:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: 1 (type: int) @@ -709,7 +716,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -760,6 +767,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out index 6c6986e..981b724 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out @@ -86,6 +86,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index ec3e2b8..27a7819 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -170,6 +170,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: s_store_id (type: string), 0L (type: bigint) @@ -301,6 +303,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index bb555df..30089da 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -313,10 +313,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -342,7 +343,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index 5fb8258..4e9535b 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -128,6 +128,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -278,6 +280,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -499,6 +503,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -710,6 +716,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 17704e5..9d58312 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -3389,9 +3389,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3405,7 +3406,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3502,9 +3503,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3518,7 +3520,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3617,9 +3619,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3633,7 +3636,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3730,6 +3733,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3848,6 +3853,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3928,9 +3935,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3944,7 +3952,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -4044,6 +4052,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4151,9 +4161,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4167,7 +4178,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -4290,9 +4301,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4306,7 +4318,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4416,9 +4428,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4432,7 +4445,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4552,9 +4565,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4568,7 +4582,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4678,6 +4692,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator @@ -4796,9 +4812,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4812,7 +4829,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -4826,9 +4843,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4842,7 +4860,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4962,9 +4980,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4978,7 +4997,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5105,9 +5124,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5121,7 +5141,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5233,9 +5253,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5249,7 +5270,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -5380,9 +5401,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5396,7 +5418,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -5529,9 +5551,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5545,7 +5568,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -5705,9 +5728,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5721,7 +5745,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -5872,6 +5896,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkStringOperator @@ -6009,10 +6035,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -6038,7 +6065,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6211,10 +6238,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -6240,7 +6268,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6415,10 +6443,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -6444,7 +6473,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6623,6 +6652,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 0:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) @@ -6830,6 +6861,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -6966,10 +6999,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -6995,7 +7029,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -7177,6 +7211,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -7377,10 +7413,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -7406,7 +7443,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -7637,10 +7674,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -7666,7 +7704,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -7852,10 +7890,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -7882,7 +7921,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8109,10 +8148,11 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -8138,7 +8178,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8325,6 +8365,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -8534,10 +8576,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -8563,7 +8606,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8597,10 +8640,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -8626,7 +8670,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8834,10 +8878,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -8863,7 +8908,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9064,10 +9109,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9093,7 +9139,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9278,10 +9324,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9307,7 +9354,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9528,10 +9575,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9557,7 +9605,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9780,10 +9828,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9809,7 +9858,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10090,10 +10139,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -10119,7 +10169,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10370,6 +10420,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -10529,10 +10581,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -10558,7 +10611,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10732,10 +10785,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -10761,7 +10815,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10937,10 +10991,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -10966,7 +11021,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11146,6 +11201,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 0:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) @@ -11354,6 +11411,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -11490,10 +11549,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -11519,7 +11579,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11702,6 +11762,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -11903,10 +11965,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -11932,7 +11995,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -12165,10 +12228,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -12194,7 +12258,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -12381,10 +12445,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -12411,7 +12476,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -12638,10 +12703,11 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -12667,7 +12733,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -12855,6 +12921,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -13064,10 +13132,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13093,7 +13162,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13127,10 +13196,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13156,7 +13226,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13364,10 +13434,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13393,7 +13464,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13594,10 +13665,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13623,7 +13695,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13808,10 +13880,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13837,7 +13910,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -14058,10 +14131,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -14087,7 +14161,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -14310,10 +14384,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -14339,7 +14414,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -14622,10 +14697,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -14651,7 +14727,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -14903,6 +14979,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -15062,10 +15140,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -15091,7 +15170,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -15265,10 +15344,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -15294,7 +15374,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -15470,10 +15550,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -15499,7 +15580,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -15679,6 +15760,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 0:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) @@ -15887,6 +15970,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -16023,10 +16108,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -16052,7 +16138,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -16235,6 +16321,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -16436,10 +16524,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -16465,7 +16554,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -16698,10 +16787,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -16727,7 +16817,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -16914,10 +17004,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -16944,7 +17035,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17171,10 +17262,11 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -17200,7 +17292,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17388,6 +17480,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -17597,10 +17691,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -17626,7 +17721,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17660,10 +17755,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -17689,7 +17785,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17897,10 +17993,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -17926,7 +18023,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18127,10 +18224,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -18156,7 +18254,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18341,10 +18439,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -18370,7 +18469,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18591,10 +18690,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -18620,7 +18720,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18843,10 +18943,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -18872,7 +18973,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -19155,10 +19256,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -19184,7 +19286,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -19436,6 +19538,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 37821fb..76bf948 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -42,10 +42,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -70,7 +71,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -141,10 +142,11 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -169,7 +171,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -314,10 +316,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -342,7 +345,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -419,6 +422,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 17:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out index 7a2cd54..6741948 100644 --- ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out @@ -146,6 +146,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -272,6 +274,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out index 0b645ab..557c06b 100644 --- ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out @@ -1069,6 +1069,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: double) @@ -1754,6 +1756,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int, col 12:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: boolean) @@ -2202,11 +2206,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 11:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountColumnOperator groupByMode: HASH keyExpressions: col 10:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2231,7 +2235,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2341,11 +2345,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountColumnOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2370,7 +2374,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2480,11 +2484,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 12:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountColumnOperator groupByMode: HASH keyExpressions: col 11:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2509,7 +2513,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2720,11 +2724,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 13:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountColumnOperator groupByMode: HASH keyExpressions: col 12:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2749,7 +2753,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out index e4bc4f0..76822c1 100644 --- ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out @@ -147,6 +147,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 7:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index 4901e83..a9758f9 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -745,6 +745,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index a841d4c..00d7142 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -320,6 +320,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index b1209d9..7e513e4 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -287,6 +287,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -506,6 +508,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:decimal(15,2), col 1:decimal(15,2) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) @@ -1587,6 +1591,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1806,6 +1812,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) diff --git ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out index 068453f..870ea9b 100644 --- ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out @@ -116,6 +116,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -286,6 +288,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -456,6 +460,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -613,6 +619,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -770,6 +778,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 687b4af..c85f398 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -449,11 +449,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 2:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -478,7 +478,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1380,11 +1380,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -1409,7 +1409,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2340,6 +2340,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_time (type: timestamp) @@ -2874,11 +2876,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 2:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -2903,7 +2905,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3805,11 +3807,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -3834,7 +3836,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4765,6 +4767,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_time (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out index 568549d..8f8bbad 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out @@ -86,6 +86,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: name (type: string), age (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out index 3f92327..ae1a083 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out @@ -67,6 +67,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out index bd42ed2..7959b24 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out @@ -104,6 +104,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out index 8fb0752..9dbb3f1 100644 --- ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out @@ -119,6 +119,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -326,6 +328,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index 5b43765..388522e 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -358,6 +358,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 20:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 7be4d7d..9a07f6c 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -491,7 +491,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint native: false @@ -522,7 +522,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index aec161d..5634385 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -306,6 +306,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -450,6 +452,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out index 8351192..a45ae35 100644 --- ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out @@ -1262,6 +1262,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index f19e2ca..465ea89 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -73,6 +73,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -200,10 +202,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -229,7 +232,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -282,6 +285,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 8abd234..5cee8fc 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -75,6 +75,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index f05e5c0..ca93389 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -206,6 +206,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -409,6 +411,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -612,6 +616,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index acb9126..9aa084d 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -298,6 +298,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One Long Key IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: _col0 (type: tinyint) diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index ccf9aae..165a1d6 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -3542,6 +3542,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 46a2470..20f437c 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -146,6 +146,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -371,6 +373,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -512,6 +516,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index c9dd434..cc391d0 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -806,6 +806,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -933,6 +935,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1078,6 +1082,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash