diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt index 87335f1..a4b32c3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt @@ -58,6 +58,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarWithConvert.txt index 105eb92..b29af8d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarWithConvert.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarWithConvert.txt @@ -59,6 +59,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt index c6614ab..154e814 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt @@ -58,6 +58,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt index 841ef93..2df857c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt @@ -62,6 +62,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt index cf690db..bbbcc71 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt @@ -24,6 +24,8 @@ import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import com.google.common.base.Preconditions; + public class extends VectorExpression { private static final long serialVersionUID = 1L; @@ -43,15 +45,20 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + Preconditions.checkState(batch.validateIsNull()); + if (childExpressions != null) { this.evaluateChildren(batch); } + Preconditions.checkState(batch.validateIsNull()); + inputColVector = () batch.cols[colNum]; outputColVector = () batch.cols[outputColumn]; int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; [] vector = inputColVector.vector; @@ -67,7 +74,7 @@ public class extends VectorExpression { //Repeating property will not change. outputVector[0] = ( vector[0]); // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + outputIsNull[0] = inputIsNull[0]; outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { @@ -97,6 +104,8 @@ public class extends VectorExpression { outputColVector.isRepeating = false; } + + Preconditions.checkState(batch.validateIsNull()); } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt index b52b7c7..3f8a6aa 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt @@ -55,6 +55,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; [] vector = inputColVector.vector; diff --git ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalScalarWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalScalarWithConvert.txt index abee249..84e6884 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalScalarWithConvert.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalScalarWithConvert.txt @@ -61,6 +61,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; diff --git ql/src/gen/vectorization/ExpressionTemplates/DateTimeScalarArithmeticIntervalColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DateTimeScalarArithmeticIntervalColumnWithConvert.txt index 93a441a..816472c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateTimeScalarArithmeticIntervalColumnWithConvert.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateTimeScalarArithmeticIntervalColumnWithConvert.txt @@ -74,6 +74,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeScalarWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeScalarWithConvert.txt index 8fa3563..cb89588 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeScalarWithConvert.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeScalarWithConvert.txt @@ -61,6 +61,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalScalarArithmeticDateTimeColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalScalarArithmeticDateTimeColumnWithConvert.txt index 0464a5e..675c5c7 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalScalarArithmeticDateTimeColumnWithConvert.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalScalarArithmeticDateTimeColumnWithConvert.txt @@ -74,6 +74,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt index 4fcbdc0..9382e5a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt @@ -70,6 +70,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnWithConvert.txt index 91887c8..dd83084 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnWithConvert.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnWithConvert.txt @@ -72,6 +72,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt index f8a8457..86694cb 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt @@ -70,6 +70,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt index c8a5d17..7fe0f14 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt @@ -62,6 +62,7 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; + outputColVector.reset(); outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java index 58f1190..e99fd9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -31,6 +31,8 @@ import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import com.google.common.base.Preconditions; + /** * Filter operator implementation. **/ @@ -90,6 +92,8 @@ public void process(Object row, int tag) throws HiveException { VectorizedRowBatch vrg = (VectorizedRowBatch) row; + Preconditions.checkState(vrg.validateIsNull()); + //The selected vector represents selected rows. //Clone the selected vector System.arraycopy(vrg.selected, 0, temporarySelected, 0, vrg.size); @@ -111,6 +115,14 @@ public void process(Object row, int tag) throws HiveException { default: // All are selected, do nothing } + + int column = vrg.invalidIsNullCol(); + if (column != -1) { + int invalidRow = vrg.cols[column].invalidIsNullRow(); + throw new RuntimeException("column " + column + " noNulls is true and isNull[" + invalidRow + "] is true after " + conditionEvaluator.toString()); + } + Preconditions.checkState(vrg.validateIsNull()); + if (vrg.size > 0) { forward(vrg, null); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 0524c08..7e5cb2d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -51,6 +51,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.DataOutputBuffer; +import com.google.common.base.Preconditions; + /** * Vectorized GROUP BY operator implementation. Consumes the vectorized input and * stores the aggregate operators' intermediate states. Emits row mode output. @@ -877,9 +879,14 @@ public void endGroup() throws HiveException { public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; + Preconditions.checkState(batch.validateIsNull()); + if (batch.size > 0) { processingMode.processBatch(batch); } + + Preconditions.checkState(batch.validateIsNull()); + } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index 73b905f..fbddbfc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -36,6 +36,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import com.google.common.base.Preconditions; + /** * Select operator implementation. */ @@ -118,15 +120,28 @@ public void process(Object row, int tag) throws HiveException { } VectorizedRowBatch vrg = (VectorizedRowBatch) row; + + Preconditions.checkState(vrg.validateIsNull()); + for (int i = 0; i < vExpressions.length; i++) { try { vExpressions[i].evaluate(vrg); + + int column = vrg.invalidIsNullCol(); + if (column != -1) { + int invalidRow = vrg.cols[column].invalidIsNullRow(); + throw new RuntimeException("column " + column + " noNulls is true and isNull[" + invalidRow + "] is true after " + vExpressions[i].toString()); + } + Preconditions.checkState(vrg.validateIsNull()); + } catch (RuntimeException e) { throw new HiveException("Error evaluating " + conf.getColList().get(i).getExprString(), e); } } + Preconditions.checkState(vrg.validateIsNull()); + // Prepare output, set the projections int[] originalProjections = vrg.projectedColumns; int originalProjectionSize = vrg.projectionSize; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 9b90f37..e3ac3f0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -646,11 +646,12 @@ public static String displayBytes(byte[] bytes, int start, int length) { return sb.toString(); } - public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, String prefix) { + public static String debugOneRowString(VectorizedRowBatch batch, int index, + int[] projectedColumns, int projectionSize) { StringBuilder sb = new StringBuilder(); - sb.append(prefix + " row " + index + " "); - for (int p = 0; p < batch.projectionSize; p++) { - int column = batch.projectedColumns[p]; + sb.append("row " + index + " "); + for (int p = 0; p < projectionSize; p++) { + int column = projectedColumns[p]; if (p == column) { sb.append("(col " + p + ") "); } else { @@ -691,7 +692,28 @@ public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, Strin } sb.append(" "); } - LOG.info(sb.toString()); + return sb.toString(); + } + + public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, + int[] projectedColumns, int projectionSize, String prefix) { + LOG.info(prefix + " " + debugOneRowString(batch, index, projectedColumns, projectionSize)); + } + + public static void debugDisplayBatch(VectorizedRowBatch batch, + int[] projectedColumns, int projectionSize, String prefix) { + for (int i = 0; i < batch.size; i++) { + int index = (batch.selectedInUse ? batch.selected[i] : i); + debugDisplayOneRow(batch, index, projectedColumns, projectionSize, prefix); + } + } + + public static String debugOneRowString(VectorizedRowBatch batch, int index) { + return debugOneRowString(batch, index, batch.projectedColumns, batch.projectionSize); + } + + public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, String prefix) { + LOG.info(prefix + " " + debugOneRowString(batch, index)); } public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) { @@ -700,4 +722,186 @@ public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) { debugDisplayOneRow(batch, index, prefix); } } + + public static String bareOneRowString(VectorizedRowBatch batch, int index, + int[] projectedColumns, int projectionSize) { + StringBuilder sb = new StringBuilder(); + for (int p = 0; p < projectionSize; p++) { + int column = projectedColumns[p]; + ColumnVector colVector = batch.cols[column]; + if (colVector == null) { + // sb.append("(null ColumnVector)"); + } else { + boolean isRepeating = colVector.isRepeating; + if (isRepeating) { + // sb.append("(repeating)"); + } + index = (isRepeating ? 0 : index); + if (colVector.noNulls || !colVector.isNull[index]) { + if (colVector instanceof LongColumnVector) { + sb.append(((LongColumnVector) colVector).vector[index]); + } else if (colVector instanceof DoubleColumnVector) { + sb.append(((DoubleColumnVector) colVector).vector[index]); + } else if (colVector instanceof BytesColumnVector) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector; + byte[] bytes = bytesColumnVector.vector[index]; + int start = bytesColumnVector.start[index]; + int length = bytesColumnVector.length[index]; + if (bytes == null) { + sb.append("(Unexpected null bytes with start " + start + " length " + length + ")"); + } else { + sb.append("bytes: '" + displayBytes(bytes, start, length) + "'"); + } + } else if (colVector instanceof DecimalColumnVector) { + sb.append(((DecimalColumnVector) colVector).vector[index].toString()); + } else { + sb.append("Unknown"); + } + } else { + sb.append("NULL"); + } + } + sb.append(" "); + } + return sb.toString(); + } + + public static String bareOneRowString(VectorizedRowBatch batch, int index) { + return bareOneRowString(batch, index, batch.projectedColumns, batch.projectionSize); + } + + public static void bareDisplayOneRow(VectorizedRowBatch batch, int index, + int[] projectedColumns, int projectionSize, String prefix) { + LOG.info(prefix + " " + bareOneRowString(batch, index, projectedColumns, projectionSize)); + } + + public static void bareDisplayOneRow(VectorizedRowBatch batch, int index, String prefix) { + LOG.info(prefix + " " + bareOneRowString(batch, index)); + } + + public static void bareDisplayBatch(VectorizedRowBatch batch, + int[] projectedColumns, int projectionSize, String prefix) { + for (int i = 0; i < batch.size; i++) { + int index = (batch.selectedInUse ? batch.selected[i] : i); + bareDisplayOneRow(batch, index, projectedColumns, projectionSize, prefix); + } + } + + public static void bareDisplayBatch(VectorizedRowBatch batch, String prefix) { + for (int i = 0; i < batch.size; i++) { + int index = (batch.selectedInUse ? batch.selected[i] : i); + bareDisplayOneRow(batch, index, prefix); + } + } + + public static class DebugBatchNullSummarizer { + + private ColumnVector.Type[] columnVectorTypes; + + private long[] nullCounters; + private long[] nonNullCounters; + private long[] repeatedColumnCounters; + + private int[] projectedColumns; + private int projectionSize; + + private long batchCount; + private long rowCount; + + public DebugBatchNullSummarizer() { + columnVectorTypes = null; + nullCounters = null; + nonNullCounters = null; + repeatedColumnCounters = null; + + projectedColumns = null; + projectionSize = 0; + batchCount = 0; + rowCount = 0; + } + + public void analyze(VectorizedRowBatch batch, String prefix) { + if (nullCounters == null) { + columnVectorTypes = new ColumnVector.Type[batch.projectionSize]; + for (int i = 0; i < batch.projectionSize; i++) { + int p = batch.projectedColumns[i]; + ColumnVector colVector = batch.cols[p]; + ColumnVector.Type type; + if (colVector == null) { + type = null; + } else if (colVector instanceof LongColumnVector) { + type = ColumnVector.Type.LONG; + } else if (colVector instanceof DoubleColumnVector) { + type = ColumnVector.Type.DOUBLE; + } else if (colVector instanceof BytesColumnVector) { + type = ColumnVector.Type.BYTES; + } else if (colVector instanceof DecimalColumnVector) { + type = ColumnVector.Type.DECIMAL; + } else { + throw new RuntimeException("Unknown column vector type " + colVector.getClass().getName()); + } + columnVectorTypes[i] = type; + } + nullCounters = new long[batch.projectionSize]; + nonNullCounters = new long[batch.projectionSize]; + repeatedColumnCounters = new long[batch.projectionSize]; + projectedColumns = Arrays.copyOf(batch.projectedColumns, batch.projectionSize); + projectionSize = batch.projectionSize; + } + + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int i = 0; i < batch.projectionSize; i++) { + int p = batch.projectedColumns[i]; + ColumnVector colVector = batch.cols[p]; + if (colVector == null) { + continue; + } + if (colVector.isRepeating) { + repeatedColumnCounters[i]++; + if (colVector.noNulls || !colVector.isNull[0]) { + nonNullCounters[i] += batch.size; + } else { + nullCounters[i] += batch.size; + } + } else if (colVector.noNulls) { + nonNullCounters[i] += batch.size; + } else { + for (int r = 0; r < batch.size; r++) { + int adjustedIndex = (selectedInUse ? selected[r] : r); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + nonNullCounters[i] ++; + } else { + nullCounters[i] ++; + } + } + } + } + batchCount++; + rowCount += batch.size; + } + + public void displaySummary(String prefix) { + StringBuilder sb = new StringBuilder(); + sb.append(prefix + " "); + sb.append("batch count "); + sb.append(batchCount); + sb.append(", row count "); + sb.append(rowCount); + for (int i = 0; i < projectionSize; i++) { + sb.append(", "); + sb.append(columnVectorTypes[i] == null ? "NULL" : columnVectorTypes[i].name()); + sb.append(" "); + sb.append(nonNullCounters[i]); + sb.append(":"); + sb.append(nullCounters[i]); + if (repeatedColumnCounters[i] != 0) { + sb.append("(repeated "); + sb.append(repeatedColumnCounters[i]); + sb.append(")"); + } + } + LOG.info(sb.toString()); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index 8d75cf3..9487090 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -142,6 +142,7 @@ private void evaluateDecimal(VectorizedRowBatch vrg) { @Override public void evaluate(VectorizedRowBatch vrg) { + vrg.cols[outputColumn].reset(); switch (type) { case LONG: evaluateLong(vrg); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java index 4b1182c..2d90feb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java @@ -54,7 +54,7 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; - outV.initBuffer(); + outV.reset(); if (n == 0) { //Nothing to do diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java index 41e3b0f..22212e3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java @@ -21,6 +21,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import com.google.common.base.Preconditions; + /** * This class represents a non leaf binary operator in the expression tree. */ @@ -34,10 +36,29 @@ public FilterExprAndExpr() { @Override public void evaluate(VectorizedRowBatch batch) { + + Preconditions.checkState(batch.validateIsNull()); + childExpressions[0].evaluate(batch); for (int childIndex = 1; childIndex < childExpressions.length; childIndex++) { + + int column = batch.invalidIsNullCol(); + if (column != -1) { + int invalidRow = batch.cols[column].invalidIsNullRow(); + throw new RuntimeException("column " + column + " noNulls is true and isNull[" + invalidRow + "] is true after " + childExpressions[childIndex - 1].toString()); + } + Preconditions.checkState(batch.validateIsNull()); + childExpressions[childIndex].evaluate(batch); } + + int column = batch.invalidIsNullCol(); + if (column != -1) { + int invalidRow = batch.cols[column].invalidIsNullRow(); + throw new RuntimeException("column " + column + " noNulls is true and isNull[" + invalidRow + "] is true after " + childExpressions[childExpressions.length - 1].toString()); + } + Preconditions.checkState(batch.validateIsNull()); + } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java index dc5139d..c1a022d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java @@ -119,6 +119,8 @@ public void evaluate(VectorizedRowBatch batch) { return; } + Preconditions.checkState(batch.validateIsNull()); + VectorExpression childExpr1 = this.childExpressions[0]; boolean prevSelectInUse = batch.selectedInUse; @@ -137,6 +139,13 @@ public void evaluate(VectorizedRowBatch batch) { childExpr1.evaluate(batch); + int column = batch.invalidIsNullCol(); + if (column != -1) { + int invalidRow = batch.cols[column].invalidIsNullRow(); + throw new RuntimeException("column " + column + " noNulls is true and isNull[" + invalidRow + "] is true after " + childExpr1.toString()); + } + Preconditions.checkState(batch.validateIsNull()); + // Preserve the selected reference and size values generated // after the first child is evaluated. int sizeAfterFirstChild = batch.size; @@ -180,7 +189,14 @@ public void evaluate(VectorizedRowBatch batch) { VectorExpression childExpr = this.childExpressions[childIndex]; childExpr.evaluate(batch); - + + column = batch.invalidIsNullCol(); + if (column != -1) { + int invalidRow = batch.cols[column].invalidIsNullRow(); + throw new RuntimeException("column " + column + " noNulls is true and isNull[" + invalidRow + "] is true after " + childExpr.toString()); + } + Preconditions.checkState(batch.validateIsNull()); + // Merge the result of last evaluate to previous evaluate. newSize += batch.size; for (int i = 0; i < batch.size; i++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java index fa0a746..2227f01 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java @@ -57,7 +57,7 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector = inputColVector.vector; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputCol]; - outV.initBuffer(); + outV.reset(); if (n == 0) { //Nothing to do diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java index ca11a55..09d5103 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java @@ -65,13 +65,13 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = arg1ColVector.vector; + outputColVector.reset(); + // return immediately if batch is empty if (n == 0) { return; } - outputColVector.initBuffer(); - /* All the code paths below propagate nulls even if neither arg2 nor arg3 * have nulls. This is to reduce the number of code paths and shorten the * code, at the expense of maybe doing unnecessary work if neither input diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java index 4e09448..76acbe4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java @@ -66,13 +66,13 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = arg1ColVector.vector; + outputColVector.reset(); + // return immediately if batch is empty if (n == 0) { return; } - outputColVector.initBuffer(); - /* All the code paths below propagate nulls even if arg2 has no nulls. * This is to reduce the number of code paths and shorten the * code, at the expense of maybe doing unnecessary work if neither input diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java index 79ed71e..807402f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java @@ -66,13 +66,13 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = arg1ColVector.vector; + outputColVector.reset(); + // return immediately if batch is empty if (n == 0) { return; } - outputColVector.initBuffer(); - /* All the code paths below propagate nulls even arg3 has no * nulls. This is to reduce the number of code paths and shorten the * code, at the expense of maybe doing unnecessary work if neither input diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java index 2a35970..f0120a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java @@ -65,13 +65,13 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = arg1ColVector.vector; + outputColVector.reset(); + // return immediately if batch is empty if (n == 0) { return; } - outputColVector.initBuffer(); - if (arg1ColVector.isRepeating) { if (vector1[0] == 1) { outputColVector.fill(arg2Scalar); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java index fbca683..0a2b8cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java @@ -55,7 +55,7 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector = inputColVector.vector; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; - outV.initBuffer(); + outV.reset(); if (n == 0) { //Nothing to do diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java index 1cd3c46..084a022 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java @@ -58,15 +58,15 @@ public void evaluate(VectorizedRowBatch batch) { int[] start = inputColVector.start; int[] length = inputColVector.length; + // initialize output vector buffer to receive data + outV.reset(); + if (n == 0) { // Nothing to do return; } - // initialize output vector buffer to receive data - outV.initBuffer(); - if (inputColVector.noNulls) { outV.noNulls = true; if (inputColVector.isRepeating) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java index 56bc97b..d1b092d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java @@ -63,14 +63,14 @@ public void evaluate(VectorizedRowBatch batch) { int[] start1 = inV1.start; int[] start2 = inV2.start; + // prepare output buffer to accept results + outV.reset(); + // return immediately if batch is empty if (n == 0) { return; } - // prepare output buffer to accept results - outV.initBuffer(); - /* Handle default case for isRepeating setting for output. This will be set to true * later in the special cases where that is necessary. */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java index b98f72d..a10c328 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java @@ -58,15 +58,15 @@ public void evaluate(VectorizedRowBatch batch) { int[] start = inputColVector.start; int[] length = inputColVector.length; + // initialize output vector buffer to receive data + outV.reset(); + if (n == 0) { // Nothing to do return; } - // initialize output vector buffer to receive data - outV.initBuffer(); - if (inputColVector.noNulls) { outV.noNulls = true; if (inputColVector.isRepeating) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java index 75a99f0..3e95b6a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java @@ -122,6 +122,8 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector inV = (BytesColumnVector) batch.cols[colNum]; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + outV.reset(); + int n = batch.size; if (n == 0) { @@ -132,7 +134,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int[] len = inV.length; int[] start = inV.start; - outV.initBuffer(); if (inV.isRepeating) { outV.isRepeating = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java index 0ff7af6..76e0511 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java @@ -140,6 +140,7 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector inV = (BytesColumnVector) batch.cols[colNum]; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + outV.reset(); int n = batch.size; @@ -151,7 +152,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int[] len = inV.length; int[] start = inV.start; - outV.initBuffer(); if (inV.isRepeating) { outV.isRepeating = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java index 016a695..af439d4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java @@ -68,7 +68,7 @@ public void evaluate(VectorizedRowBatch batch) { int [] start = inputColVector.start; int [] length = inputColVector.length; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; - outV.initBuffer(); + outV.reset(); Text t; if (n == 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java index 89ef251..74a88a0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java @@ -57,7 +57,7 @@ public void evaluate(VectorizedRowBatch batch) { int start[] = inputColVector.start; int length[] = inputColVector.length; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; - outV.initBuffer(); + outV.reset(); if (n == 0) { //Nothing to do diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index c0e4cf0..bc89b35 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -21,7 +21,9 @@ import java.io.Serializable; import java.util.Map; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; + import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -117,8 +119,16 @@ public void setChildExpressions(VectorExpression [] ve) { */ final protected void evaluateChildren(VectorizedRowBatch vrg) { if (childExpressions != null) { + Preconditions.checkState(vrg.validateIsNull()); for (VectorExpression ve : childExpressions) { ve.evaluate(vrg); + + int column = vrg.invalidIsNullCol(); + if (column != -1) { + int invalidRow = vrg.cols[column].invalidIsNullRow(); + throw new RuntimeException("column " + column + " noNulls is true and isNull[" + invalidRow + "] is true after " + ve.toString()); + } + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index c1c137b..7204409 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -51,6 +51,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.ByteStream.Output; +import com.google.common.base.Preconditions; + /** * This class has methods for generating vectorized join results and forwarding batchs. * @@ -104,6 +106,40 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { //------------------------------------------------------------------------------------------------ + protected boolean verifyOutputColumnReset(ColumnVector colVector, String title, int column) { + if (colVector.isRepeating) { + Preconditions.checkState(false, title + " " + column + " isRepeating is true"); + } + if (!colVector.noNulls) { + Preconditions.checkState(false, title + " " + column + " noNulls is false"); + } + boolean[] isNull = colVector.isNull; + for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { + if (isNull[i]) { + Preconditions.checkState(false, title + " " + column + " isNull[" + i + "] is true"); + } + } + return true; + } + + protected boolean verifyOutputColumnsReset(VectorizedRowBatch batch) { + + // For join operators that can generate small table results, verify their + // (target) scratch columns. + + for (int column : smallTableOutputVectorColumns) { + Preconditions.checkState(verifyOutputColumnReset(batch.cols[column], "small table column", column)); + } + + for (int column : bigTableOuterKeyOutputVectorColumns) { + Preconditions.checkState(verifyOutputColumnReset(batch.cols[column], "big table outer join key", column)); + } + + return true; + } + + //------------------------------------------------------------------------------------------------ + protected void performValueExpressions(VectorizedRowBatch batch, int[] allMatchs, int allMatchCount) { /* @@ -582,6 +618,9 @@ protected void reProcessBigTable(int partitionId) */ public void forwardBigTableBatch(VectorizedRowBatch batch) throws HiveException { + // Make sure MapJoin didn't corrupt the batch. + Preconditions.checkState(batch.validateIsNull()); + // Save original projection. int[] originalProjections = batch.projectedColumns; int originalProjectionSize = batch.projectionSize; @@ -595,6 +634,9 @@ public void forwardBigTableBatch(VectorizedRowBatch batch) throws HiveException // Revert the projected columns back, because batch can be re-used by our parent operators. batch.projectionSize = originalProjectionSize; batch.projectedColumns = originalProjections; + + // Make sure some downstream operator didn't corrupt the batch. + Preconditions.checkState(batch.validateIsNull()); } @@ -602,7 +644,15 @@ public void forwardBigTableBatch(VectorizedRowBatch batch) throws HiveException * Forward the overflow batch and reset the batch. */ protected void forwardOverflow() throws HiveException { + + // Make sure MapJoin didn't corrupt the batch. + Preconditions.checkState(overflowBatch.validateIsNull()); + forward(overflowBatch, null); + + // Make sure some downstream operator didn't corrupt the batch. + Preconditions.checkState(overflowBatch.validateIsNull()); + overflowBatch.reset(); } @@ -610,7 +660,14 @@ protected void forwardOverflow() throws HiveException { * Forward the overflow batch, but do not reset the batch. */ private void forwardOverflowNoReset() throws HiveException { + + // Make sure MapJoin didn't corrupt the batch. + Preconditions.checkState(overflowBatch.validateIsNull()); + forward(overflowBatch, null); + + // Make sure some downstream operator didn't corrupt the batch. + Preconditions.checkState(overflowBatch.validateIsNull()); } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java index 6b33a39..e029408 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java @@ -32,6 +32,8 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import com.google.common.base.Preconditions; + /** * This class has methods for generating vectorized join results for the big table only * variation of inner joins. @@ -147,6 +149,8 @@ protected void finishInnerBigOnly(VectorizedRowBatch batch, VectorMapJoinHashTableResult[] hashTableResults, int hashMapResultCount) throws HiveException, IOException { + Preconditions.checkState(batch.validateIsNull()); + // Get rid of spills before we start modifying the batch. if (spillCount > 0) { spillHashMapBatch(batch, hashTableResults, @@ -158,6 +162,9 @@ protected void finishInnerBigOnly(VectorizedRowBatch batch, */ if (allMatchCount > 0 && bigTableValueExpressions != null) { performValueExpressions(batch, allMatchs, allMatchCount); + + Preconditions.checkState(batch.validateIsNull()); + } int numSel = 0; @@ -177,6 +184,10 @@ protected void finishInnerBigOnly(VectorizedRowBatch batch, } batch.size = numSel; batch.selectedInUse = true; + + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); + + Preconditions.checkState(batch.validateIsNull()); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java index 9e77d22..a3e9ac2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java @@ -37,6 +37,8 @@ // Single-Column Long specific imports. import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an inner join on a Single-Column Long * and only big table columns appear in the join result so a hash multi-set is used. @@ -127,6 +129,8 @@ public void process(Object row, int tag) throws HiveException { // Do the per-batch setup for an inner big-only join. + Preconditions.checkState(batch.validateIsNull()); + // (Currently none) // innerBigOnlyPerBatchSetup(batch); @@ -144,6 +148,10 @@ public void process(Object row, int tag) throws HiveException { return; } + if (batch.selectedInUse) { + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { @@ -200,6 +208,9 @@ public void process(Object row, int tag) throws HiveException { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]); + + Preconditions.checkState(batch.validateIsNull()); + } else { /* diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java index e4f6c5d..5e1809f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java @@ -39,6 +39,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an inner join on Multi-Key * and only big table columns appear in the join result so a hash multi-set is used. @@ -132,6 +134,8 @@ public void process(Object row, int tag) throws HiveException { // Do the per-batch setup for an inner big-only join. + Preconditions.checkState(batch.validateIsNull()); + // (Currently none) // innerBigOnlyPerBatchSetup(batch); @@ -149,6 +153,10 @@ public void process(Object row, int tag) throws HiveException { return; } + if (batch.selectedInUse) { + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java index 2711b10..22bb685 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java @@ -38,6 +38,8 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an inner join on a Single-Column String * and only big table columns appear in the join result so a hash multi-set is used. @@ -118,6 +120,8 @@ public void process(Object row, int tag) throws HiveException { // Do the per-batch setup for an inner big-only join. + Preconditions.checkState(batch.validateIsNull()); + // (Currently none) // innerBigOnlyPerBatchSetup(batch); @@ -135,6 +139,10 @@ public void process(Object row, int tag) throws HiveException { return; } + if (batch.selectedInUse) { + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java index 36d0611..5a1584d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java @@ -33,6 +33,8 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import com.google.common.base.Preconditions; + /** * This class has methods for generating vectorized join results for inner joins. * @@ -192,6 +194,8 @@ protected void finishInner(VectorizedRowBatch batch, batch.size = numSel; batch.selectedInUse = true; + + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); } protected void finishInnerRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java index 0197225..bbe33c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java @@ -36,6 +36,8 @@ // Single-Column Long specific imports. import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an inner join on a Single-Column Long * using a hash map. @@ -126,6 +128,8 @@ public void process(Object row, int tag) throws HiveException { // Do the per-batch setup for an inner join. + Preconditions.checkState(batch.validateIsNull()); + innerPerBatchSetup(batch); // For inner joins, we may apply the filter(s) now. @@ -142,6 +146,10 @@ public void process(Object row, int tag) throws HiveException { return; } + if (batch.selectedInUse) { + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java index 837d97b..df21a70 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java @@ -38,6 +38,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an inner join on a Multi-Key * using a hash map. @@ -130,6 +132,8 @@ public void process(Object row, int tag) throws HiveException { // Do the per-batch setup for an inner join. + Preconditions.checkState(batch.validateIsNull()); + innerPerBatchSetup(batch); // For inner joins, we may apply the filter(s) now. @@ -146,6 +150,10 @@ public void process(Object row, int tag) throws HiveException { return; } + if (batch.selectedInUse) { + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java index b2711c3..b2020a6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java @@ -37,6 +37,8 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an inner join on a Single-Column String * using a hash map. @@ -117,6 +119,8 @@ public void process(Object row, int tag) throws HiveException { // Do the per-batch setup for an inner join. + Preconditions.checkState(batch.validateIsNull()); + innerPerBatchSetup(batch); // For inner joins, we may apply the filter(s) now. @@ -133,6 +137,10 @@ public void process(Object row, int tag) throws HiveException { return; } + if (batch.selectedInUse) { + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java index d1d6c42..58ad14a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java @@ -32,6 +32,8 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import com.google.common.base.Preconditions; + /** * This class has methods for generating vectorized join results for left semi joins. * @@ -139,6 +141,8 @@ protected void finishLeftSemi(VectorizedRowBatch batch, int numSel = generateHashSetResults(batch, allMatchs, allMatchCount); batch.size = numSel; batch.selectedInUse = true; + + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java index 4b8ab58..f8e8db0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java @@ -37,6 +37,8 @@ // Single-Column Long specific imports. import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an left semi join on a Single-Column Long * using a hash set. @@ -127,6 +129,8 @@ public void process(Object row, int tag) throws HiveException { // Do the per-batch setup for an left semi join. + Preconditions.checkState(batch.validateIsNull()); + // (Currently none) // leftSemiPerBatchSetup(batch); @@ -144,6 +148,10 @@ public void process(Object row, int tag) throws HiveException { return; } + if (batch.selectedInUse) { + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java index bdf7901..2a652f0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java @@ -39,6 +39,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an left semi join on Multi-Key * using hash set. @@ -131,6 +133,8 @@ public void process(Object row, int tag) throws HiveException { // Do the per-batch setup for an left semi join. + Preconditions.checkState(batch.validateIsNull()); + // (Currently none) // leftSemiPerBatchSetup(batch); @@ -148,6 +152,10 @@ public void process(Object row, int tag) throws HiveException { return; } + if (batch.selectedInUse) { + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java index a8d3459..12c7c5d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java @@ -38,6 +38,8 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an left semi join on a Single-Column String * using a hash set. @@ -118,6 +120,8 @@ public void process(Object row, int tag) throws HiveException { // Do the per-batch setup for an left semi join. + Preconditions.checkState(batch.validateIsNull()); + // (Currently none) // leftSemiPerBatchSetup(batch); @@ -135,6 +139,10 @@ public void process(Object row, int tag) throws HiveException { return; } + if (batch.selectedInUse) { + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index 5a88784..fe63c51 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -33,6 +33,8 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; +import com.google.common.base.Preconditions; + /** * This class has methods for generating vectorized join results for outer joins. * @@ -263,9 +265,13 @@ private void doValueExpr(VectorizedRowBatch batch, private int subtractFromInputSelected(boolean inputSelectedInUse, int inputLogicalSize, int[] remove, int removeSize, int[] difference) throws HiveException { - // if (!verifyMonotonicallyIncreasing(remove, removeSize)) { - // throw new HiveException("remove is not in sort order and unique"); - // } + Preconditions.checkState((inputSelected != remove) && (remove != difference) && (difference != inputSelected)); + + if (inputSelectedInUse) { + Preconditions.checkState(verifyMonotonicallyIncreasing(inputSelected, inputLogicalSize)); + } + + Preconditions.checkState(verifyMonotonicallyIncreasing(remove, removeSize)); int differenceCount = 0; @@ -294,9 +300,7 @@ private int subtractFromInputSelected(boolean inputSelectedInUse, int inputLogic throw new HiveException("Not all batch indices removed"); } - // if (!verifyMonotonicallyIncreasing(difference, differenceCount)) { - // throw new HiveException("difference is not in sort order and unique"); - // } + Preconditions.checkState(verifyMonotonicallyIncreasing(difference, differenceCount)); return differenceCount; } @@ -323,9 +327,12 @@ private int subtractFromInputSelected(boolean inputSelectedInUse, int inputLogic private int subtract(int[] all, int allSize, int[] remove, int removeSize, int[] difference) throws HiveException { - // if (!verifyMonotonicallyIncreasing(remove, removeSize)) { - // throw new HiveException("remove is not in sort order and unique"); - // } + + Preconditions.checkState((all != remove) && (remove != difference) && (difference != all)); + + Preconditions.checkState(verifyMonotonicallyIncreasing(all, allSize)); + + Preconditions.checkState(verifyMonotonicallyIncreasing(remove, removeSize)); int differenceCount = 0; @@ -344,6 +351,8 @@ private int subtract(int[] all, int allSize, throw new HiveException("Not all batch indices removed"); } + Preconditions.checkState(verifyMonotonicallyIncreasing(difference, differenceCount)); + return differenceCount; } @@ -363,14 +372,12 @@ private int subtract(int[] all, int allSize, private int sortMerge(int[] selected1, int selected1Count, int[] selected2, int selected2Count, int[] sortMerged) throws HiveException { - // if (!verifyMonotonicallyIncreasing(selected1, selected1Count)) { - // throw new HiveException("selected1 is not in sort order and unique"); - // } - // if (!verifyMonotonicallyIncreasing(selected2, selected2Count)) { - // throw new HiveException("selected1 is not in sort order and unique"); - // } + Preconditions.checkState((selected1 != selected2) && (selected2 != sortMerged) && (sortMerged != selected1)); + Preconditions.checkState(verifyMonotonicallyIncreasing(selected1, selected1Count)); + + Preconditions.checkState(verifyMonotonicallyIncreasing(selected2, selected2Count)); int sortMergeCount = 0; @@ -390,13 +397,164 @@ private int sortMerge(int[] selected1, int selected1Count, } } - // if (!verifyMonotonicallyIncreasing(sortMerged, sortMergeCount)) { - // throw new HiveException("sortMerged is not in sort order and unique"); - // } + Preconditions.checkState(verifyMonotonicallyIncreasing(sortMerged, sortMergeCount)); return sortMergeCount; } + private boolean validateCoverage(boolean inputSelectedInUse, int inputLogicalSize, int spillCount, + int allMatchCount, int noMatchCount) { + + Preconditions.checkState(inputLogicalSize >= 0); + Preconditions.checkState(inputLogicalSize <= VectorizedRowBatch.DEFAULT_SIZE); + if (inputSelectedInUse) { + Preconditions.checkState(verifyMonotonicallyIncreasing(inputSelected, inputLogicalSize)); + } + Preconditions.checkState(spillCount >= 0); + Preconditions.checkState(verifyMonotonicallyIncreasing(spills, spillCount)); + + Preconditions.checkState(allMatchCount >= 0); + Preconditions.checkState(verifyMonotonicallyIncreasing(allMatchs, allMatchCount)); + + Preconditions.checkState(noMatchCount >= 0); + Preconditions.checkState(verifyMonotonicallyIncreasing(noMatchs, noMatchCount)); + + int allCount = spillCount + allMatchCount + noMatchCount; + + Preconditions.checkState(inputLogicalSize == allCount); + + boolean[] allValidate = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + + int batchIndex; + + for (int i = 0; i < spillCount; i++) { + batchIndex = spills[i]; + Preconditions.checkState(!allValidate[batchIndex]); + allValidate[batchIndex] = true; + } + + for (int i = 0; i < allMatchCount; i++) { + batchIndex = allMatchs[i]; + Preconditions.checkState(!allValidate[batchIndex]); + allValidate[batchIndex] = true; + } + + for (int i = 0; i < noMatchCount; i++) { + batchIndex = noMatchs[i]; + Preconditions.checkState(!allValidate[batchIndex]); + allValidate[batchIndex] = true; + } + + if (inputSelectedInUse) { + for (int i = 0; i < inputLogicalSize; i++) { + batchIndex = inputSelected[i]; + Preconditions.checkState(allValidate[batchIndex]); + } + } else { + for (int i = 0; i < inputLogicalSize; i++) { + Preconditions.checkState(allValidate[i]); + } + } + + return true; + } + + private boolean validateOuterResult(VectorizedRowBatch batch, + boolean inputSelectedInUse, int inputLogicalSize, int spillCount, + int allMatchCount, int equalKeySeriesCount, int noMatchCount) { + + // Assume validateCoverage called. + + boolean[] expectedRows = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + + int numSel = 0; + for (int i = 0; i < equalKeySeriesCount; i++) { + int allMatchesIndex = equalKeySeriesAllMatchIndices[i]; + boolean isSingleValue = equalKeySeriesIsSingleValue[i]; + int duplicateCount = equalKeySeriesDuplicateCounts[i]; + + if (isSingleValue) { + for (int s = 0; s < duplicateCount; s++) { + int batchIndex = allMatchs[allMatchesIndex + s]; + Preconditions.checkState(!expectedRows[batchIndex]); + expectedRows[batchIndex] = true; + numSel++; + } + } + } + + for (int i = 0; i < noMatchCount; i++) { + int batchIndex = noMatchs[i]; + Preconditions.checkState(!expectedRows[batchIndex]); + expectedRows[batchIndex] = true; + numSel++; + } + + Preconditions.checkState(batch.size == numSel); + if (numSel > 0) { + Preconditions.checkState(batch.selectedInUse); + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, numSel)); + for (int i = 0; i < numSel; i++) { + int batchIndex = batch.selected[i]; + Preconditions.checkState(expectedRows[batchIndex]); + } + } + + for (int i = 0; i < noMatchCount; i++) { + int batchIndex = noMatchs[i]; + + for (int column : bigTableOuterKeyOutputVectorColumns) { + ColumnVector colVector = batch.cols[column]; + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + Preconditions.checkState(!colVector.noNulls); + Preconditions.checkState(colVector.isNull[adjustedIndex]); + } + + // Small table values are set to null. + for (int column : smallTableOutputVectorColumns) { + ColumnVector colVector = batch.cols[column]; + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + Preconditions.checkState(!colVector.noNulls); + Preconditions.checkState(colVector.isNull[adjustedIndex]); + } + } + + for (int i = 0; i < equalKeySeriesCount; i++) { + int allMatchesIndex = equalKeySeriesAllMatchIndices[i]; + boolean isSingleValue = equalKeySeriesIsSingleValue[i]; + int duplicateCount = equalKeySeriesDuplicateCounts[i]; + + if (isSingleValue) { + for (int s = 0; s < duplicateCount; s++) { + int batchIndex = allMatchs[allMatchesIndex + s]; + + for (int column : bigTableOuterKeyOutputVectorColumns) { + ColumnVector colVector = batch.cols[column]; + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + Preconditions.checkState( + (colVector.noNulls && !colVector.isNull[adjustedIndex]) || + !colVector.noNulls); + } + + // Small table values are set to null. + for (int column : smallTableOutputVectorColumns) { + ColumnVector colVector = batch.cols[column]; + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + Preconditions.checkState( + (colVector.noNulls && !colVector.isNull[adjustedIndex]) || + !colVector.noNulls); + } + } + } + } + + if (batch.selectedInUse) { + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); + } + + return true; + } + /** * Generate the outer join output results for one vectorized row batch. * @@ -474,6 +632,10 @@ public void finishOuter(VectorizedRowBatch batch, } } + Preconditions.checkState( + validateCoverage(inputSelectedInUse, inputLogicalSize, spillCount, + allMatchCount, noMatchCount)); + // When we generate results into the overflow batch, we may still end up with fewer rows // in the big table batch. So, nulSel and the batch's selected array will be rebuilt with // just the big table rows that need to be forwarded, minus any rows processed with the @@ -543,6 +705,11 @@ public void finishOuter(VectorizedRowBatch batch, } } } + + Preconditions.checkState( + validateOuterResult(batch, + inputSelectedInUse, inputLogicalSize, spillCount, + allMatchCount, equalKeySeriesCount, noMatchCount)); } /** @@ -570,6 +737,8 @@ protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, // key as null, too. for (int column : bigTableOuterKeyOutputVectorColumns) { ColumnVector colVector = batch.cols[column]; + Preconditions.checkState(!colVector.isRepeating); + Preconditions.checkState(!colVector.isNull[batchIndex]); colVector.noNulls = false; colVector.isNull[batchIndex] = true; } @@ -577,6 +746,8 @@ protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, // Small table values are set to null. for (int column : smallTableOutputVectorColumns) { ColumnVector colVector = batch.cols[column]; + Preconditions.checkState(!colVector.isRepeating); + Preconditions.checkState(!colVector.isNull[batchIndex]); colVector.noNulls = false; colVector.isNull[batchIndex] = true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java index 5b687fd..ba9963c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java @@ -38,6 +38,8 @@ // Single-Column Long specific imports. import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an outer join on a Single-Column Long * using a hash map. @@ -136,6 +138,8 @@ public void process(Object row, int tag) throws HiveException { // Do the per-batch setup for an outer join. + Preconditions.checkState(batch.validateIsNull()); + outerPerBatchSetup(batch); // For outer join, remember our input rows before ON expression filtering or before @@ -143,9 +147,7 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } @@ -390,9 +392,6 @@ public void process(Object row, int tag) throws HiveException { break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java index e212a2a..d67e3bf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java @@ -39,6 +39,8 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an outer join on Multi-Key * using a hash map. @@ -139,6 +141,8 @@ public void process(Object row, int tag) throws HiveException { // Do the per-batch setup for an outer join. + Preconditions.checkState(batch.validateIsNull()); + outerPerBatchSetup(batch); // For outer join, remember our input rows before ON expression filtering or before @@ -146,9 +150,7 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } @@ -406,9 +408,6 @@ public void process(Object row, int tag) throws HiveException { break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java index e4107ff..cbd59c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java @@ -37,6 +37,8 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import com.google.common.base.Preconditions; + /* * Specialized class for doing a vectorized map join that is an outer join on a Single-Column String * using a hash map. @@ -126,6 +128,8 @@ public void process(Object row, int tag) throws HiveException { // Do the per-batch setup for an outer join. + Preconditions.checkState(batch.validateIsNull()); + outerPerBatchSetup(batch); // For outer join, remember our input rows before ON expression filtering or before @@ -133,9 +137,7 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } + Preconditions.checkState(verifyMonotonicallyIncreasing(batch.selected, batch.size)); System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } @@ -376,9 +378,6 @@ public void process(Object row, int tag) throws HiveException { break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index d3a0f9f..9570882 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -120,11 +120,7 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; ColumnVector outV = batch.cols[outputColumn]; - - // If the output column is of type string, initialize the buffer to receive data. - if (outV instanceof BytesColumnVector) { - ((BytesColumnVector) outV).initBuffer(); - } + outV.reset(); if (n == 0) { //Nothing to do diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index a85bfef..f853f73 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -27,6 +27,8 @@ import java.util.List; import java.util.Map; +import com.google.common.base.Preconditions; + import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.orc.BooleanColumnStatistics; import org.apache.orc.OrcUtils; @@ -1062,6 +1064,8 @@ public VectorizedRowBatch nextBatch(VectorizedRowBatch previous) throws IOExcept } result.size = (int) batchSize; + Preconditions.checkState(result.validateIsNull()); + advanceToNextRow(reader, rowInStripe + rowBaseInStripe, true); return result; } catch (IOException e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java index 3fe28d8..cc67e6b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java @@ -240,23 +240,16 @@ Object next(Object previous) throws IOException { */ public Object nextVector(Object previousVector, long batchSize) throws IOException { ColumnVector result = (ColumnVector) previousVector; + result.reset(); if (present != null) { // Set noNulls and isNull vector of the ColumnVector based on // present stream - result.noNulls = true; for (int i = 0; i < batchSize; i++) { result.isNull[i] = (present.next() != 1); if (result.noNulls && result.isNull[i]) { result.noNulls = false; } } - } else { - // There is not present stream, this means that all the values are - // present. - result.noNulls = true; - for (int i = 0; i < batchSize; i++) { - result.isNull[i] = false; - } } return previousVector; } diff --git ql/src/test/results/clientpositive/tez/vector_custom_udf_configure.q.out ql/src/test/results/clientpositive/tez/vector_custom_udf_configure.q.out new file mode 100644 index 0000000..1113ac7 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_custom_udf_configure.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: create temporary function UDFHelloTest as 'org.apache.hadoop.hive.ql.exec.vector.UDFHelloTest' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: udfhellotest +POSTHOOK: query: create temporary function UDFHelloTest as 'org.apache.hadoop.hive.ql.exec.vector.UDFHelloTest' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: udfhellotest +PREHOOK: query: create table testorc1(id int, name string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testorc1 +POSTHOOK: query: create table testorc1(id int, name string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testorc1 +PREHOOK: query: insert into table testorc1 values(1, 'a1'), (2,'a2') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@testorc1 +POSTHOOK: query: insert into table testorc1 values(1, 'a1'), (2,'a2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@testorc1 +POSTHOOK: Lineage: testorc1.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: testorc1.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +select id, UDFHelloTest(name) from testorc1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select id, UDFHelloTest(name) from testorc1 +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_4] + compressed:false + Statistics:Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} + Select Operator [OP_3] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:testorc1 + Statistics:Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + +PREHOOK: query: select id, UDFHelloTest(name) from testorc1 +PREHOOK: type: QUERY +PREHOOK: Input: default@testorc1 +#### A masked pattern was here #### +POSTHOOK: query: select id, UDFHelloTest(name) from testorc1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testorc1 +#### A masked pattern was here #### +1 Hello a1 +2 Hello a2 diff --git ql/src/test/results/clientpositive/tez/vectorization_offset_limit.q.out ql/src/test/results/clientpositive/tez/vectorization_offset_limit.q.out new file mode 100644 index 0000000..b05d941 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vectorization_offset_limit.q.out @@ -0,0 +1,108 @@ +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + TableScan + alias: alltypesorc + Filter Operator + predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double) + outputColumnNames: _col0, _col1 + Limit + Number of rows: 2 + Offset of rows: 3 + ListSink + +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-1887561756 10361.0 +-1887561756 -8881.0 +PREHOOK: query: explain +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: smallint) + Execution mode: vectorized + Reducer 2 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 3 + Offset of rows: 10 + Statistics: Num rows: 3 Data size: 645 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 645 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index fcb1ae9..3f44697 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.util.Arrays; +import com.google.common.base.Preconditions; + /** * ColumnVector contains the shared structure for the sub-types, * including NULL information, and whether this vector @@ -82,6 +84,19 @@ public ColumnVector(int len) { preFlattenIsRepeating = false; } + public int invalidIsNullRow() { + + // Only call this method when noNulls is true; + Preconditions.checkState(noNulls); + + for (int i = 0; i < isNull.length; i++) { + if (isNull[i]) { + return i; + } + } + return -1; + } + /** * Resets the column to default state * - fills the isNull array with false @@ -91,6 +106,8 @@ public ColumnVector(int len) { public void reset() { if (!noNulls) { Arrays.fill(isNull, false); + } else { + Preconditions.checkState(invalidIsNullRow() == -1); } noNulls = true; isRepeating = false; diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 380300e..cc14f33 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -24,6 +24,8 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; +import com.google.common.base.Preconditions; + /** * A VectorizedRowBatch is a set of rows, organized with each column * as a vector. It is the unit of query execution, organized to minimize @@ -95,6 +97,34 @@ public VectorizedRowBatch(int numCols, int size) { partitionColumnCount = -1; } + public boolean validateIsNull() { + int row; + for (int i = 0; i < numCols; i++) { + ColumnVector colVector = cols[i]; + if (colVector != null && colVector.noNulls) { + row = colVector.invalidIsNullRow(); + if (row != -1) { + Preconditions.checkState(false, "column " + i + " noNulls is true and isNull[" + row + "] array entry is true"); + } + } + } + return true; + } + + public int invalidIsNullCol() { + int row; + for (int i = 0; i < numCols; i++) { + ColumnVector colVector = cols[i]; + if (colVector != null && colVector.noNulls) { + row = colVector.invalidIsNullRow(); + if (row != -1) { + return i; + } + } + } + return -1; + } + public void setPartitionInfo(int dataColumnCount, int partitionColumnCount) { this.dataColumnCount = dataColumnCount; this.partitionColumnCount = partitionColumnCount;