diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 95a4b9d..3e68624 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2448,6 +2448,7 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, b VectorAggregateExpression aggExpr = ctor.newInstance( vectorParams.length > 0 ? vectorParams[0] : null); aggExpr.init(desc); + LOG.info("Vectorized aggregation class " + aggClass.getName()); return aggExpr; } catch (Exception e) { throw new HiveException("Internal exception for vector aggregate : \"" + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index d75d185..6573871 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -649,12 +649,21 @@ public static String displayBytes(byte[] bytes, int start, int length) { public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, String prefix) { StringBuilder sb = new StringBuilder(); sb.append(prefix + " row " + index + " "); - for (int column = 0; column < batch.cols.length; column++) { + for (int p = 0; p < batch.projectionSize; p++) { + int column = batch.projectedColumns[p]; + if (p == column) { + sb.append("(col " + p + ") "); + } else { + sb.append("(proj col " + p + " col " + column + ") "); + } ColumnVector colVector = batch.cols[column]; if (colVector == null) { - sb.append("(null colVector " + column + ")"); + sb.append("(null ColumnVector)"); } else { boolean isRepeating = colVector.isRepeating; + if (isRepeating) { + sb.append("(repeating)"); + } index = (isRepeating ? 0 : index); if (colVector.noNulls || !colVector.isNull[index]) { if (colVector instanceof LongColumnVector) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java index 4e43905..cf373a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java @@ -97,7 +97,12 @@ public void aggregateInputSelection( ColumnVector inputVector = batch.cols[this.inputExpression.getOutputColumn()]; - if (inputVector.noNulls) { + if (inputVector.isRepeating) { + if (inputVector.noNulls || !inputVector.isNull[0]) { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, batchSize); + } + } else if (inputVector.noNulls) { // if there are no nulls then the iteration is the same on all cases iterateNoNullsWithAggregationSelection( aggregationBufferSets, aggregateIndex, batchSize); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 2d9da84..1c3ba45 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin; +import com.google.common.base.Preconditions; + import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -104,6 +106,38 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { //------------------------------------------------------------------------------------------------ + protected boolean verifyOutputColumnReset(ColumnVector colVector, String title, int column) { + if (colVector.isRepeating) { + Preconditions.checkState(false, title + " " + column + " isRepeating is true"); + } + if (!colVector.noNulls) { + Preconditions.checkState(false, title + " " + column + " noNulls is false"); + } + boolean[] isNull = colVector.isNull; + for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { + if (isNull[i]) { + Preconditions.checkState(false, title + " " + column + " isNull[" + i + "] is true"); + } + } + return true; + } + + protected boolean verifyOutputColumnsReset(VectorizedRowBatch batch) { + + // For join operators that can generate small table results, verify their + // (target) scratch columns. + + for (int column : smallTableOutputVectorColumns) { + Preconditions.checkState(verifyOutputColumnReset(batch.cols[column], "small table column", column)); + } + + for (int column : bigTableOuterKeyOutputVectorColumns) { + Preconditions.checkState(verifyOutputColumnReset(batch.cols[column], "big table outer join key", column)); + } + + return true; + } + protected void performValueExpressions(VectorizedRowBatch batch, int[] allMatchs, int allMatchCount) { /* @@ -166,6 +200,10 @@ protected int generateHashMapResultSingleValue(VectorizedRowBatch batch, int batchIndex = allMatchs[allMatchesIndex + i]; + // if (batchIndex == 0) { + // Preconditions.checkState(verifyOutputColumnsReset(batch)); + // } + if (bigTableVectorCopyOuterKeys != null) { // Copy within row. bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, batch, batchIndex); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index 5a88784..f57f6ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin; import java.io.IOException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; @@ -33,6 +34,8 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; +import com.google.common.base.Preconditions; + /** * This class has methods for generating vectorized join results for outer joins. * @@ -566,6 +569,10 @@ protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, for (int i = 0; i < noMatchSize; i++) { int batchIndex = noMatchs[i]; + // if (batchIndex == 0) { + // Preconditions.checkState(verifyOutputColumnsReset(batch)); + // } + // Mark any scratch small table scratch columns that would normally receive a copy of the // key as null, too. for (int column : bigTableOuterKeyOutputVectorColumns) { @@ -726,6 +733,8 @@ public void finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult jo */ protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException { + // Preconditions.checkState(verifyOutputColumnsReset(batch)); + for (int column : smallTableOutputVectorColumns) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index a842649..614dc59 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1386,6 +1386,9 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo if (!ret) { return false; } + + LOG.info("Vectorized Map GROUP BY mode " + desc.getMode().name()); + } else { // ReduceWork