diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 974bfac..34e7cfa 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -731,18 +731,25 @@ minillaplocal.query.files=\ vector_groupby_grouping_sets_limit.q,\ vector_groupby_grouping_window.q,\ vector_groupby_rollup1.q,\ + vector_if_expr_2.q,\ vector_join30.q,\ vector_join_filters.q,\ vector_leftsemi_mapjoin.q,\ + vector_like_2.q,\ vector_llap_text_1.q,\ vector_mapjoin_reduce.q,\ vector_number_compare_projection.q,\ + vector_order_null.q,\ + vector_outer_reference_windowed.q,\ vector_partitioned_date_time.q,\ vector_ptf_1.q,\ vector_ptf_part_simple.q,\ vector_reuse_scratchcols.q,\ + vector_string_decimal.q,\ vector_udf_adaptor_1.q,\ + vector_udf_string_to_boolean.q,\ vector_udf1.q,\ + vector_udf2.q,\ vector_windowing.q,\ vector_windowing_expressions.q,\ vector_windowing_gby.q,\ @@ -755,9 +762,12 @@ minillaplocal.query.files=\ vector_windowing_streaming.q,\ vector_windowing_windowspec.q,\ vector_windowing_windowspec4.q,\ + vectorization_div0.q,\ vectorization_input_format_excludes.q,\ + # vectorization_offset_limit.q,\ vectorized_insert_into_bucketed_table.q,\ vectorization_short_regress.q,\ + # vectorized_distinct_gby.q,\ vectorized_dynamic_partition_pruning.q,\ vectorized_dynamic_semijoin_reduction.q,\ vectorized_dynamic_semijoin_reduction2.q,\ diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt index b5011c3..ce931fa 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,31 +66,23 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. - */ - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputVector[0] = vector1[0] vector2[0]; } else if (inputColVector1.isRepeating) { final vector1Value = vector1[0]; @@ -122,9 +120,9 @@ public class extends VectorExpression { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt index ae0d348..d7c7fb4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,33 +68,13 @@ public class extends VectorExpression { DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; HiveDecimalWritable[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - if (inputColVector1.noNulls && inputColVector2.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, - inputColVector1.isRepeating && inputColVector2.isRepeating, - batch.selectedInUse, sel, n); - } - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt index cbec1ab..922d003 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; @@ -53,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,45 +70,82 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] value; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector.isRepeating) { - outputVector[0] = vector[0] value; - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] value; + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] value; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector[i] value; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] value; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value; + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] value; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = vector[i] value; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = vector[i] value; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt index d5aef78..59614f1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,68 +72,94 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, vector[0], value, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - + if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(0, vector[0], value, outputColVector); - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, vector[i], value, outputColVector); + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } } } else { - for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - - /* - * Null data entries are not set to a special non-zero value because all null math operations - * are checked, meaning that a zero-divide always results in a null result. - */ } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt index fd31672..14a0993 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,31 +66,23 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. - */ - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputVector[0] = vector1[0] vector2[0] ? 1 : 0; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { @@ -120,9 +118,9 @@ public class extends VectorExpression { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt index 51e6994..372f943 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -53,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,63 +69,92 @@ public class extends VectorExpression { inputColVector = () batch.cols[colNum]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = vector[0] value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] value ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector[i] value ? 1 : 0; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] value ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + * NOTE: We can't avoid conditional statements for LONG/DOUBLE because of NULL + * comparison requirements. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector[i] value ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector[i] value ? 1 : 0; - } + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt index 3e95557..4853749 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,22 +66,14 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt index 2be16cc..c3836d3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,34 +67,15 @@ public class extends VectorExpression { DecimalColumnVector inputColVector1 = (DecimalColumnVector) batch.cols[colNum1]; DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; int[] sel = batch.selected; - int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; HiveDecimalWritable[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - if (inputColVector1.noNulls && inputColVector2.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, - inputColVector1.isRepeating && inputColVector2.isRepeating, - batch.selectedInUse, sel, n); - } - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); @@ -103,10 +90,12 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector1[0], vector2[i], outputColVector); } } else { for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector1[0], vector2[i], outputColVector); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt index 159a61e..b9b038e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; @@ -53,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,50 +70,88 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (value == 0) { // Denominator is zero, convert the batch to nulls outputColVector.noNulls = false; outputColVector.isRepeating = true; outputIsNull[0] = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; } else if (inputColVector.isRepeating) { - outputVector[0] = vector[0] value; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] value; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] value; + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] value; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector[i] value; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] value; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value; + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { + for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] value; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = vector[i] value; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = vector[i] value; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt index 2631468..1d7603e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,28 +72,12 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; +; HiveDecimalWritable[] vector = inputColVector.vector; HiveDecimalWritable[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (value.compareTo(HiveDecimal.ZERO) == 0) { @@ -93,45 +85,90 @@ public class extends VectorExpression { outputColVector.noNulls = false; outputColVector.isRepeating = true; outputIsNull[0] = true; + return; } else if (inputColVector.isRepeating) { - DecimalUtil.Checked(0, vector[0], value, outputColVector); + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, vector[0], value, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } } } else { - for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - - // copy isNull entry first because operation may overwrite it - outputIsNull[i] = inputIsNull[i]; - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - // copy isNull entries first because operation may overwrite them - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - - /* - * Null data entries are not set to a special non-zero value because all null math operations - * are checked, meaning that a zero-divide always results in a null result. - */ } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt index 811f6db..01d1795 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -44,6 +47,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { this.evaluateChildren(batch); } @@ -53,49 +62,83 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - int n = batch.size; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = ( vector[0]); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = ( vector[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ( vector[i]); + + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = ( vector[i]); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = ( vector[i]); + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ( vector[i]); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = ( vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = ( vector[i]); + } } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { + for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = ( vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = ( vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ( vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt index f0ab471..2423457 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -47,6 +50,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { this.evaluateChildren(batch); } @@ -56,49 +65,82 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = - vector[0]; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = - vector[0]; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = -vector[i]; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = -vector[i]; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = -vector[i]; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = -vector[i]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = -vector[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = -vector[i]; + } } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = -vector[i]; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = -vector[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = -vector[i]; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; + } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt index 027e6ed..0d3ee2b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt @@ -21,9 +21,11 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import java.sql.Date; +import org.apache.hadoop.hive.serde2.io.DateWritable; /** - * Generated from template DTIColumnCompareScalar.txt, which covers comparison + * Generated from template DTIColumnCompareScalar.txt, which covers comparison * expressions between a datetime/interval column and a scalar of the same type. The boolean output * is stored in a separate boolean column. */ @@ -40,6 +42,11 @@ public class extends { } @Override + public String vectorExpressionParameters() { + + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt index ebc0d8a..be5f641 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt @@ -15,12 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.udf.UDFToString; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.io.LongWritable; +import java.sql.Date; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -45,6 +44,11 @@ public class extends { } @Override + public String vectorExpressionParameters() { + + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt index 5db9a0b..bf1128a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt @@ -61,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -75,22 +81,14 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt index bf4b24c..bb246c0 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Date; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -61,6 +63,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,59 +82,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[0])); + dtm.( + scratchDate1, value, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[0])); - dtm.( - scratchDate1, value, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } else { - for(int i = 0; i != n; i++) { - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int i = 0; i != n; i++) { + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt index 847ebac..a515319 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -60,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,20 +81,12 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; - long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + long[] vector1 = inputColVector1.vector; - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt index 180bebc..d1c2feb 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -61,6 +62,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,58 +81,107 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector1.isRepeating) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt index 4f12315..680f2f8 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import java.sql.Date; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -75,6 +77,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -88,59 +96,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchIntervalYearMonth2.set((int) vector2[0]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector2.isRepeating) { - scratchIntervalYearMonth2.set((int) vector2[0]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } else { - for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt index a6fa2ac..79473cc 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -75,6 +76,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -88,52 +95,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + value, inputColVector2.asScratch(0), outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - dtm.( - value, inputColVector2.asScratch(0), outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt index 30b03ba..5b643bb 100644 --- ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,39 +68,18 @@ public class extends VectorExpression { Decimal64ColumnVector inputColVector2 = (Decimal64ColumnVector) batch.cols[colNum2]; Decimal64ColumnVector outputColVector = (Decimal64ColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; boolean[] outputIsNull = outputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } - final long outputDecimal64AbsMax = HiveDecimalWritable.getDecimal64AbsMax(outputColVector.precision); - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - if (inputColVector1.noNulls && inputColVector2.noNulls) { - - /* - * Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, - inputColVector1.isRepeating && inputColVector2.isRepeating, - batch.selectedInUse, sel, n); - } - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt index 81dcf33..866cd51 100644 --- ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,95 +72,133 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - final long outputDecimal64AbsMax = HiveDecimalWritable.getDecimal64AbsMax(outputColVector.precision); - if (inputColVector.noNulls) { - - /* - * Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - // The following may override a "false" null setting if an error or overflow occurs. - final long result = vector[0] value; - outputVector[0] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = vector[0] value; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } else { + outputVector[0] = result; + } + } else { outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } } } } else { - for(int i = 0; i != n; i++) { - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } + } + } else { + for(int i = 0; i != n; i++) { + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } } - - // Currently, we defer division, etc to regular HiveDecimal so we don't do any null - // default value setting here. } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt index dc6ccb9..c768e89 100644 --- ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -56,6 +58,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,95 +73,131 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - final long outputDecimal64AbsMax = HiveDecimalWritable.getDecimal64AbsMax(outputColVector.precision); - if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = value vector[0]; - outputVector[0] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = value vector[0]; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } else { + outputVector[0] = result; + } + } else { outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } else { - for(int i = 0; i != n; i++) { - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + for(int i = 0; i != n; i++) { + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } } - - // Currently, we defer division, etc to regular HiveDecimal so we don't do any null - // default value setting here. } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt index 1ab5228..a7622ea 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -27,7 +30,6 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; -import java.util.Arrays; public class extends VectorExpression { private static final long serialVersionUID = 1L; @@ -49,6 +51,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { this.evaluateChildren(batch); } @@ -58,54 +66,93 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - if (inputColVector.isRepeating) { + HiveDecimalWritable[] vector = inputColVector.vector; - // All must be selected otherwise size would be zero - // Repeating property will not change. - outputIsNull[0] = inputIsNull[0]; - (0, vector[0], outputColVector); + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + (0, vector[0], outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; + return; + } - // Set isNull because decimal operation can yield a null. - outputIsNull[i] = false; - (i, vector[i], outputColVector); + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } } } else { - - // Set isNull because decimal operation can yield a null. - Arrays.fill(outputIsNull, 0, n, false); - for(int i = 0; i != n; i++) { - (i, vector[i], outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - (i, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - (i, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } - outputColVector.isRepeating = false; } } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt index 36ad892..55eb50e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt @@ -57,28 +57,27 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } inputColVector = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + [] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. + if ((vector[0] < leftValue || vector[0] > rightValue)) { - + // Entire batch is filtered out. batch.size = 0; } @@ -105,12 +104,9 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((vector[0] < leftValue || vector[0] > rightValue)) { - + // Entire batch is filtered out. batch.size = 0; } @@ -121,7 +117,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((leftValue <= vector[i] && vector[i] <= rightValue)) { sel[newSize++] = i; } @@ -132,7 +128,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((leftValue <= vector[i] && vector[i] <= rightValue)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt index 150d341..e458992 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,15 +68,10 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - + // filter rows with NULL on left input int newSize; newSize = NullUtil.filterNulls(batch.cols[colNum1], batch.selectedInUse, sel, n); @@ -85,12 +86,9 @@ public class extends VectorExpression { n = batch.size = newSize; batch.selectedInUse = true; } - + // All rows with nulls have been filtered out, so just do normal filter for non-null case if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. if (!(vector1[0] vector2[0])) { batch.size = 0; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt index a9ddeca..c955c06 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt @@ -51,25 +51,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } inputColVector = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + [] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(vector[0] value)) { //Entire batch is filtered out. batch.size = 0; @@ -97,9 +96,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(vector[0] value)) { //Entire batch is filtered out. batch.size = 0; @@ -111,7 +108,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (vector[i] value) { sel[newSize++] = i; } @@ -122,7 +119,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (vector[i] value) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt index 7c41f3e..f42668c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt @@ -60,26 +60,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + HiveDecimalWritable[] vector = inputColVector.vector; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. if ((DecimalUtil.compare(vector[0], leftValue) < 0 || DecimalUtil.compare(vector[0], rightValue) > 0)) { // Entire batch is filtered out. @@ -108,10 +106,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((DecimalUtil.compare(vector[0], leftValue) < 0 || DecimalUtil.compare(vector[0], rightValue) > 0)) { // Entire batch is filtered out. @@ -124,19 +119,19 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((DecimalUtil.compare(leftValue, vector[i]) <= 0 && DecimalUtil.compare(vector[i], rightValue) <= 0)) { sel[newSize++] = i; } } } - + // Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((DecimalUtil.compare(leftValue, vector[i]) <= 0 && DecimalUtil.compare(vector[i], rightValue) <= 0)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt index 6a82183..77fe7ae 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,15 +68,10 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; HiveDecimalWritable[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt index 80a19d9..078b132 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt @@ -53,24 +53,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + HiveDecimalWritable[] vector = inputColVector.vector; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!(DecimalUtil.compare(vector[0], value) 0)) { // Entire batch is filtered out. @@ -99,9 +99,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(DecimalUtil.compare(vector[0], value) 0)) { // Entire batch is filtered out. @@ -114,7 +112,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(vector[i], value) 0) { sel[newSize++] = i; } @@ -126,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(vector[i], value) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt index 4b7e849..20dbaba 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt @@ -53,24 +53,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + HiveDecimalWritable[] vector = inputColVector.vector; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!(DecimalUtil.compare(value, vector[0]) 0)) { // Entire batch is filtered out. @@ -99,9 +99,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(DecimalUtil.compare(value, vector[0]) 0)) { // Entire batch is filtered out. @@ -114,7 +112,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(value, vector[i]) 0) { sel[newSize++] = i; } @@ -126,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(value, vector[i]) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt index f741409..4afed54 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt @@ -56,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,13 +71,8 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - [] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + [] vector1 = inputColVector1.vector; // filter rows with NULL on left input int newSize; @@ -90,9 +91,6 @@ public class extends VectorExpression { // All rows with nulls have been filtered out, so just do normal filter for non-null case if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. if (!(vector1[0] inputColVector2.(0))) { batch.size = 0; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt index 8ece14f..8f8104d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt @@ -57,24 +57,22 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector.isNull; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(value inputColVector.(0))) { //Entire batch is filtered out. batch.size = 0; @@ -102,9 +100,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(value inputColVector.(0))) { //Entire batch is filtered out. batch.size = 0; @@ -116,7 +112,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value inputColVector.(i)) { sel[newSize++] = i; } @@ -127,7 +123,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value inputColVector.(i)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt index 18840f1..28b5704 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt @@ -52,25 +52,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } inputColVector = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + [] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(value vector[0])) { //Entire batch is filtered out. batch.size = 0; @@ -98,9 +97,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(value vector[0])) { //Entire batch is filtered out. batch.size = 0; @@ -112,7 +109,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value vector[i]) { sel[newSize++] = i; } @@ -123,7 +120,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value vector[i]) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt index b9a332a..b7f70e1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt @@ -56,27 +56,26 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -108,9 +107,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -124,20 +121,20 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; } } } - + //Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt index 0f0cb2e..2d18d1d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,36 +67,31 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; + byte[][] vector1 = inputColVector1.vector; byte[][] vector2 = inputColVector2.vector; int[] start1 = inputColVector1.start; int[] start2 = inputColVector2.start; int[] length1 = inputColVector1.length; int[] length2 = inputColVector2.length; - - // return immediately if batch is empty - if (n == 0) { - return; - } - + // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - + /* Either all must remain selected or all will be eliminated. * Repeating property will not change. */ - if (!((vector1[0], start1[0], length1[0], + if (!((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0]))) { batch.size = 0; - } + } } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -99,7 +100,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -114,7 +115,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -123,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -137,7 +138,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -146,7 +147,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -173,7 +174,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -184,7 +185,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -206,7 +207,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -215,7 +216,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -231,7 +232,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -242,7 +243,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -254,16 +255,16 @@ public class extends VectorExpression { } } } - + // handle case where only input 1 has nulls } else if (inputColVector2.noNulls) { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { if (nullPos1[0] || - !((vector1[0], start1[0], length1[0], + !((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0]))) { - batch.size = 0; + batch.size = 0; return; - } + } } else if (inputColVector1.isRepeating) { if (nullPos1[0]) { @@ -275,7 +276,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -284,7 +285,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -300,7 +301,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -311,7 +312,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -328,7 +329,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -339,7 +340,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -349,17 +350,17 @@ public class extends VectorExpression { batch.size = newSize; batch.selectedInUse = true; } - } + } } - + // handle case where both inputs have nulls } else { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { if (nullPos1[0] || nullPos2[0] || - !((vector1[0], start1[0], length1[0], + !((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0]))) { - batch.size = 0; - } + batch.size = 0; + } } else if (inputColVector1.isRepeating) { if (nullPos1[0]) { batch.size = 0; @@ -370,7 +371,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -381,7 +382,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -402,7 +403,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -413,7 +414,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -430,7 +431,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos1[i] && !nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -441,7 +442,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos1[i] && !nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt index a85a889..76ec8a0 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt @@ -52,27 +52,26 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!((vector[0], start[0], length[0], value, 0, value.length))) { //Entire batch is filtered out. @@ -101,9 +100,7 @@ public abstract class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!((vector[0], start[0], length[0], value, 0, value.length))) { //Entire batch is filtered out. @@ -116,19 +113,19 @@ public abstract class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((vector[i], start[i], length[i], value, 0, value.length)) { sel[newSize++] = i; } } } - + //Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((vector[i], start[i], length[i], value, 0, value.length)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt index f3d1e58..91d8da5c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt @@ -53,27 +53,26 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!((value, 0, value.length, vector[0], start[0], length[0]))) { //Entire batch is filtered out. @@ -102,9 +101,7 @@ public abstract class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!((value, 0, value.length, vector[0], start[0], length[0]))) { //Entire batch is filtered out. @@ -117,19 +114,19 @@ public abstract class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((value, 0, value.length, vector[i], start[i], length[i])) { sel[newSize++] = i; } } } - + //Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((value, 0, value.length, vector[i], start[i], length[i])) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt index 53bf271..604060a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt @@ -59,25 +59,22 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector.isNull; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. if ((inputColVector.compareTo(0, leftValue) < 0 || inputColVector.compareTo(0, rightValue) > 0)) { // Entire batch is filtered out. @@ -106,10 +103,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((inputColVector.compareTo(0, leftValue) < 0 || inputColVector.compareTo(0, rightValue) > 0)) { // Entire batch is filtered out. @@ -122,7 +116,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { sel[newSize++] = i; } @@ -134,7 +128,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt index eaa58c7..f9bc9ee 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,13 +68,8 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - [] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + [] vector2 = inputColVector2.vector; // filter rows with NULL on left input int newSize; @@ -87,9 +88,6 @@ public class extends VectorExpression { // All rows with nulls have been filtered out, so just do normal filter for non-null case if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. if (!(inputColVector1.(0) vector2[0])) { batch.size = 0; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt index 2e38269..fc1be95 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt @@ -53,24 +53,22 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector.isNull; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(inputColVector.(0) value)) { //Entire batch is filtered out. batch.size = 0; @@ -98,9 +96,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(inputColVector.(0) value)) { //Entire batch is filtered out. batch.size = 0; @@ -112,7 +108,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector.(i) value) { sel[newSize++] = i; } @@ -123,7 +119,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector.(i) value) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt index 697e3ef..0a541f9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt @@ -57,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -70,12 +76,6 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt index 435316d..68e0006 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt @@ -56,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,18 +70,10 @@ public class extends VectorExpression { inputColVector1 = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector1.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector1.isNull; if (inputColVector1.noNulls) { if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(inputColVector1.compareTo(0, value) 0)) { //Entire batch is filtered out. batch.size = 0; @@ -103,9 +101,7 @@ public class extends VectorExpression { } } else { if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(inputColVector1.compareTo(0, value) 0)) { //Entire batch is filtered out. batch.size = 0; @@ -117,7 +113,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector1.compareTo(i, value) 0) { sel[newSize++] = i; } @@ -128,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector1.compareTo(i, value) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt index 4887ad2..d5952de 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt @@ -56,6 +56,13 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -63,18 +70,10 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector2.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector2.isNull; if (inputColVector2.noNulls) { if (inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!(inputColVector2.compareTo(value, 0) 0)) { // Entire batch is filtered out. @@ -103,9 +102,7 @@ public class extends VectorExpression { } } else { if (inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(inputColVector2.compareTo(value, 0) 0)) { // Entire batch is filtered out. @@ -118,7 +115,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector2.compareTo(value, i) 0) { sel[newSize++] = i; } @@ -130,7 +127,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector2.compareTo(value, i) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt index cc86a3e..44e8e18 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt @@ -58,27 +58,26 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -110,9 +109,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -126,20 +123,20 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; } } } - + //Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt index 52f1d9e..5bab540 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -68,18 +74,14 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = arg1ColVector.vector; [] vector2 = arg2ColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (arg1ColVector.isRepeating) { if (vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -93,22 +95,26 @@ public class extends VectorExpression { // reduce the number of code paths needed below. arg2ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); - outputIsNull[i] = (vector1[i] == 1 ? - arg2ColVector.isNull[i] : false); + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); } } else { for(int i = 0; i != n; i++) { outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); - outputIsNull[i] = (vector1[i] == 1 ? - arg2ColVector.isNull[i] : false); + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); } } } else /* there are nulls */ { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt index 1693e8f..df41b5f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -68,18 +74,14 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = arg1ColVector.vector; [] vector3 = arg3ColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (arg1ColVector.isRepeating) { if (vector1[0] == 1) { outputColVector.fill(arg2Scalar); @@ -95,18 +97,25 @@ public class extends VectorExpression { // for when arg3ColVector is repeating or has no nulls. arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = (vector1[i] == 1 ? arg2Scalar : vector3[i]); + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); } } else { for(int i = 0; i != n; i++) { outputVector[i] = (vector1[i] == 1 ? arg2Scalar : vector3[i]); + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt index ebdfe47..cd532f3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -59,6 +60,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -67,48 +74,101 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = false; // output is a scalar which we know is non null - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = arg1ColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { outputColVector.fill(arg3Scalar); } - } else if (arg1ColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + return; + } + + if (arg1ColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } } } - } else /* there are nulls */ { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - arg2Scalar : arg3Scalar); - outputIsNull[i] = false; + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * Since we always generate a result without NULLs, we can optimize this case similar to + * the optimization above... + */ + + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - arg2Scalar : arg3Scalar); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } } - Arrays.fill(outputIsNull, 0, n, false); } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt index 9767973..7c5b614 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt @@ -61,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -75,22 +81,14 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt index ca5829c..93eb5eb 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Date; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -61,6 +63,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,59 +82,109 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchIntervalYearMonth1.set((int) vector1[0]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - scratchIntervalYearMonth1.set((int) vector1[0]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } else { - for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt index d6e45ac..cfe44c1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -73,21 +79,12 @@ public class extends VectorExpression { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt index 6e232e7..d005b8b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -60,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -73,59 +80,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchIntervalYearMonth1.set((int) vector1[0]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - scratchIntervalYearMonth1.set((int) vector1[0]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } else { - for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt index 041a651..ec9ea01 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Date; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -75,6 +77,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -88,59 +96,109 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[0])); + dtm.( + value, scratchDate2, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[0])); - dtm.( - value, scratchDate2, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } else { - for(int i = 0; i != n; i++) { - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int i = 0; i != n; i++) { + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt index f2d4eaf..81b1406 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -72,6 +73,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -85,52 +92,101 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(0), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - dtm.( - value, inputColVector2.asScratchTimestamp(0), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + outputIsNull[i] = inputIsNull[i]; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + outputIsNull[i] = inputIsNull[i]; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt index bd2cbac..e3f36b9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,21 +68,13 @@ public class extends VectorExpression { TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt index 889c445..10c3601 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt @@ -18,9 +18,11 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -54,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,62 +69,92 @@ public class extends VectorExpression { inputColVector1 = () batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector1.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + [] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector1[0] value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector1.noNulls; if (inputColVector1.noNulls) { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = vector1[0] value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] value ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector1[i] value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] value ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector1[i] value ? 1 : 0; - } - } - } else { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector1[0] value ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] value ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + * NOTE: We can't avoid conditional statements for LONG/DOUBLE because of NULL + * comparison requirements. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector1[i] value ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector1[i] value ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt index 4d79283..f692788 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -54,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,61 +70,88 @@ public class extends VectorExpression { TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; - long[] outputVector = outputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + long[] outputVector = outputColVector.vector; + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = value inputColVector.(0) ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value inputColVector.(0) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value inputColVector.(i) ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value inputColVector.(i) ? 1 : 0; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value inputColVector.(0) ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = value inputColVector.(i) ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = value inputColVector.(i) ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt index e95baa6..cc8b8a7 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -65,6 +67,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,45 +82,82 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } if (inputColVector.isRepeating) { - outputVector[0] = value vector[0]; - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value vector[i]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = value vector[0]; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value vector[i]; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = value vector[i]; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value vector[i]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value vector[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = value vector[i]; + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = value vector[i]; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = value vector[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = value vector[i]; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt index 3ffca6c..05ebc60 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,60 +72,93 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, value, vector[0], outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullDataEntriesDecimal(outputColVector, batch.selectedInUse, sel, n); return; } - + if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(0, value, vector[0], outputColVector); - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, value, vector[i], outputColVector); + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } } } else { - for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt index 9f4ec50..b2c1909 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -53,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,63 +69,90 @@ public class extends VectorExpression { inputColVector = () batch.cols[colNum]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = value vector[0] ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value vector[i] ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = value vector[i] ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value vector[i] ? 1 : 0; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value vector[0] ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = value vector[i] ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = value vector[i] ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = value vector[i] ? 1 : 0; - } + outputIsNull[i] = false; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt index aa33354..a2020a6 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -65,6 +67,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,56 +82,88 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - boolean hasDivBy0 = false; if (inputColVector.isRepeating) { - denom = vector[0]; - outputVector[0] = value denom; - hasDivBy0 = hasDivBy0 || (denom == 0); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + denom = vector[0]; + outputVector[0] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - denom = vector[i]; - outputVector[i] = value denom; - hasDivBy0 = hasDivBy0 || (denom == 0); + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } } } else { - for(int i = 0; i != n; i++) { - denom = vector[i]; - outputVector[i] = value denom; - hasDivBy0 = hasDivBy0 || (denom == 0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } + } else { + for(int i = 0; i != n; i++) { + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; denom = vector[i]; outputVector[i] = value denom; hasDivBy0 = hasDivBy0 || (denom == 0); - outputIsNull[i] = inputIsNull[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { denom = vector[i]; outputVector[i] = value denom; hasDivBy0 = hasDivBy0 || (denom == 0); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt index 650101c..8158136 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,67 +72,96 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; HiveDecimalWritable[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, value, vector[0], outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } if (inputColVector.noNulls) { - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - - if (inputColVector.isRepeating) { - DecimalUtil.Checked(0, value, vector[0], outputColVector); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, value, vector[i], outputColVector); + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } } } else { - for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } } } } else /* there are nulls */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - - // copy isNull entry first because the operation may overwrite it - outputIsNull[i] = inputIsNull[i]; - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - - // copy isNull entries first because the operation may overwrite them - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - - /* - * Null data entries are not set to a special non-zero value because all null math operations - * are checked, meaning that a zero-divide always results in a null result anyway. - */ } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt index 1b1db54..5bcc727 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,7 +71,6 @@ public class extends VectorExpression { boolean[] nullPos2 = inputColVector2.isNull; boolean[] outNull = outputColVector.isNull; - int n = batch.size; byte[][] vector1 = inputColVector1.vector; byte[][] vector2 = inputColVector2.vector; int[] start1 = inputColVector1.start; @@ -74,20 +79,19 @@ public class extends VectorExpression { int[] length2 = inputColVector2.length; long[] outVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.noNulls = true; + + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; + // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = true; + + // TEMPORARILY: + outputColVector.reset(); + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputColVector.isRepeating = true; - ret = (vector1[0], start1[0], length1[0], + ret = (vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0]); if (ret) { outVector[0] = 1; @@ -98,7 +102,7 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -107,7 +111,7 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -119,7 +123,7 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -128,7 +132,7 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -139,7 +143,7 @@ public class extends VectorExpression { } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -148,7 +152,7 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -156,15 +160,18 @@ public class extends VectorExpression { } } } - + // handle case where only input 2 has nulls } else if (inputColVector1.noNulls) { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputColVector.isRepeating = true; outNull[0] = nullPos2[0]; if (!nullPos2[0]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0])) { outVector[0] = 1; } else { @@ -179,19 +186,19 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { outVector[i] = 0; } - } + } } } else { for(int i = 0; i != n; i++) { outNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -211,7 +218,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; outNull[i] = false; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -221,7 +228,7 @@ public class extends VectorExpression { } else { for(int i = 0; i != n; i++) { outNull[i] = false; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -235,19 +242,19 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { outVector[i] = 0; } - } + } } } else { for(int i = 0; i != n; i++) { outNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -255,17 +262,20 @@ public class extends VectorExpression { } } } - } + } } - + // handle case where only input 1 has nulls } else if (inputColVector2.noNulls) { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputColVector.isRepeating = true; outNull[0] = nullPos1[0]; if (!nullPos1[0]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0])) { outVector[0] = 1; } else { @@ -283,7 +293,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; outNull[i] = false; - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -293,7 +303,7 @@ public class extends VectorExpression { } else { for(int i = 0; i != n; i++) { outNull[i] = false; - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -307,7 +317,7 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -319,7 +329,7 @@ public class extends VectorExpression { for(int i = 0; i != n; i++) { outNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -334,7 +344,7 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -346,7 +356,7 @@ public class extends VectorExpression { for(int i = 0; i != n; i++) { outNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -354,17 +364,20 @@ public class extends VectorExpression { } } } - } + } } - + // handle case where both inputs have nulls } else { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputColVector.isRepeating = true; outNull[0] = nullPos1[0] || nullPos2[0]; if (!outNull[0]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0])) { outVector[0] = 1; } else { @@ -382,7 +395,7 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -394,7 +407,7 @@ public class extends VectorExpression { for(int i = 0; i != n; i++) { outNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -414,7 +427,7 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -426,7 +439,7 @@ public class extends VectorExpression { for(int i = 0; i != n; i++) { outNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -441,7 +454,7 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos1[i] || nullPos2[i]; if (!outNull[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -453,7 +466,7 @@ public class extends VectorExpression { for(int i = 0; i != n; i++) { outNull[i] = nullPos1[i] || nullPos2[i]; if (!outNull[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt index ca55834..e2cb01c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt @@ -53,31 +53,43 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] nullPos = inputColVector.isNull; boolean[] outNull = outputColVector.isNull; - int n = batch.size; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; long[] outVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = false; + // TEMPORARILY: + outputColVector.reset(); + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.noNulls) { - outputColVector.noNulls = true; + + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; + outputColVector.isRepeating = true; if ((vector[0], start[0], length[0], value, 0, value.length)) { outVector[0] = 1; } else { @@ -102,7 +114,10 @@ public abstract class extends VectorExpression { } } } else { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector.isRepeating) { outputColVector.isRepeating = true; outNull[0] = nullPos[0]; diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt index ecb4d2a..ede6fd7 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt @@ -54,6 +54,13 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,22 +69,24 @@ public abstract class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos = inputColVector.isNull; boolean[] outNull = outputColVector.isNull; - int n = batch.size; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - long[] outVector = outputColVector.vector; + long[] outVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; + if (inputColVector.noNulls) { - outputColVector.noNulls = true; + + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; + outputColVector.isRepeating = true; if ((value, 0, value.length, vector[0], start[0], length[0])) { outVector[0] = 1; } else { @@ -102,7 +111,10 @@ public abstract class extends VectorExpression { } } } else { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector.isRepeating) { outputColVector.isRepeating = true; outNull[0] = nullPos[0]; diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt index a27da10..27d8a3d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt @@ -60,6 +60,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,21 +80,12 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt index 9f708e2..bd780cd 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -61,6 +62,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,52 +81,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + inputColVector1.asScratch(0), value, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - dtm.( - inputColVector1.asScratch(0), value, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt index b3d9a4b..f894bcf 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -73,21 +79,12 @@ public class extends VectorExpression { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt index e49f614..cee2355 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -57,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -70,52 +78,98 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + inputColVector1.asScratchTimestamp(0), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector1.isRepeating) { - dtm.( - inputColVector1.asScratchTimestamp(0), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt index 95e7271..4240994 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt @@ -58,6 +58,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -72,19 +78,10 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt index 6baa72a..eff8c46 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -59,6 +60,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -72,52 +79,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + inputColVector1.asScratch(0), value, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - dtm.( - inputColVector1.asScratch(0), value, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt index 54a1a37..7e65b9b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,21 +66,12 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; [] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt index 3bb95dd..012b240 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -54,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,61 +70,88 @@ public class extends VectorExpression { TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.(0) value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.(0) value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector.(i) value ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.(i) value ? 1 : 0; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inputColVector.(0) value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector.(i) value ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector.(i) value ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt index 3db5d01..b81b805 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt @@ -55,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -67,20 +73,11 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt index 1ee7b11..56d422f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt @@ -18,10 +18,12 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,61 +73,88 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector1.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector1.noNulls; if (inputColVector1.noNulls) { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; - } - } - } else { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt index 509f264..8e87b8e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -74,6 +75,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -87,59 +94,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt index 2de3044..d01da9d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -73,6 +74,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -86,59 +93,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchIntervalYearMonth2.set((int) vector2[0]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - scratchIntervalYearMonth2.set((int) vector2[0]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } else { - for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt index 4ed80d1..24fcf14 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -71,6 +72,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -84,53 +91,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + value, inputColVector2.asScratch(0), outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - dtm.( - value, inputColVector2.asScratch(0), outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt index 6cca0bb..1987ceb 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt @@ -18,10 +18,12 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -57,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -67,61 +75,90 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector2.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector2.noNulls; if (inputColVector2.noNulls) { - if (inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; - } - } - } else { - if (inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionEvaluation.txt ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionEvaluation.txt index 4ab3e76..3c8f8822 100644 --- ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionEvaluation.txt +++ ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionEvaluation.txt @@ -48,9 +48,13 @@ || inputColumnVector1.isRepeating && inputColumnVector2.isRepeating, outputColumnVector.isRepeating); + /* + We no longer set noNulls to the input ColumnVector's value since that doesn't work + for scratch column reuse. assertEquals( "Output column vector no nulls state does not match operand columns", inputColumnVector1.noNulls && inputColumnVector2.noNulls, outputColumnVector.noNulls); + */ //if repeating, only the first value matters if(!outputColumnVector.noNulls && !outputColumnVector.isRepeating) { diff --git ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt index e5f3f18..991135c 100644 --- ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt +++ ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt @@ -45,9 +45,13 @@ "Output column vector is repeating state does not match operand column", inputColumnVector.isRepeating, outputColumnVector.isRepeating); + /* + We no longer set noNulls to the input ColumnVector's value since that doesn't work + for scratch column reuse. assertEquals( "Output column vector no nulls state does not match operand column", inputColumnVector.noNulls, outputColumnVector.noNulls); + */ if(!outputColumnVector.noNulls && !outputColumnVector.isRepeating) { for(int i = 0; i < BATCH_SIZE; i++) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt index 733731f..8bce425 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt @@ -320,7 +320,7 @@ public class extends VectorAggregateExpression { [] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt index 6e42598..40f8cd2 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt @@ -359,7 +359,7 @@ public class extends VectorAggregateExpression { HiveDecimalWritable[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum.setFromLong(0L); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal64ToDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal64ToDecimal.txt index d5325c3..e4e5baa 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal64ToDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal64ToDecimal.txt @@ -371,7 +371,7 @@ public class extends VectorAggregateExpression { long[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { final long value = vector[0]; for (int i = 0; i < batchSize; i++) { myagg.avgValue(value); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt index 8ab393c..0a71a21 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt @@ -368,7 +368,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputStructColVector.isRepeating) { - if (inputStructColVector.noNulls) { + if (inputStructColVector.noNulls || !inputStructColVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.mergeSum.setFromLong(0L); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt index be2fadd..a3c07a0 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt @@ -334,7 +334,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputStructColVector.isRepeating) { - if (inputStructColVector.noNulls) { + if (inputStructColVector.noNulls || !inputStructColVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.mergeCount = 0; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt index 6190a9e..576f7ec 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt @@ -317,7 +317,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColVector.isRepeating) { - if (inputColVector.noNulls) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt index fd54256..2df45bb 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt @@ -314,7 +314,7 @@ public class extends VectorAggregateExpression { [] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { myagg.minmaxValue(vector[0]); } return; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt index 4764a45..9fe85d3 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt @@ -318,7 +318,7 @@ public class extends VectorAggregateExpression { HiveDecimalWritable[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls && + if ((inputVector.noNulls || !inputVector.isNull[0]) && (myagg.isNull || (myagg.value.compareTo(vector[0]) 0))) { myagg.isNull = false; myagg.value.set(vector[0]); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt index 4680161..9a0a6e7 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt @@ -307,7 +307,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColVector.isRepeating) { - if (inputColVector.noNulls && + if ((inputColVector.noNulls || !inputColVector.isNull[0]) && (myagg.isNull || (inputColVector.compareTo(myagg.value, 0) 0))) { myagg.isNull = false; inputColVector.intervalDayTimeUpdate(myagg.value, 0); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt index 027688d..4f0b5a5 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt @@ -278,7 +278,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColumn.isRepeating) { - if (inputColumn.noNulls) { + if (inputColumn.noNulls || !inputColumn.isNull[0]) { myagg.checkValue(inputColumn.vector[0], inputColumn.start[0], inputColumn.length[0]); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt index 370b6a8..579437e 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt @@ -309,7 +309,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColVector.isRepeating) { - if (inputColVector.noNulls && + if ((inputColVector.noNulls || !inputColVector.isNull[0]) && (myagg.isNull || (inputColVector.compareTo(myagg.value, 0) 0))) { myagg.isNull = false; inputColVector.timestampUpdate(myagg.value, 0); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt index 3e3d070..c731869 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt @@ -311,7 +311,7 @@ public class extends VectorAggregateExpression { [] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt index cb9c962..876ead5 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt @@ -311,7 +311,7 @@ public class extends VectorAggregateExpression { [] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { iterateRepeatingNoNulls(myagg, vector[0], batchSize); } } diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt index 3d03c09..cf19b14 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt @@ -311,7 +311,7 @@ public class extends VectorAggregateExpression { HiveDecimalWritable[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { iterateRepeatingNoNulls(myagg, vector[0], inputVector.scale, batchSize); } } diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt index 570d771..9b1c1cd 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt @@ -371,7 +371,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputStructColVector.isRepeating) { - if (inputStructColVector.noNulls) { + if (inputStructColVector.noNulls || !inputStructColVector.isNull[0]) { final long count = countVector[0]; final double sum = sumVector[0]; final double variance = varianceVector[0]; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt index d6cd505..1dd5ab4 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt @@ -290,7 +290,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColVector.isRepeating) { - if (inputColVector.noNulls) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { iterateRepeatingNoNulls(myagg, inputColVector.getDouble(0), batchSize); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index 5db3b07..bedc12a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -210,12 +210,14 @@ void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBa if (inColVector.isRepeating) { if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.isNull[outBatchIndex] = false; outColVector.setElement(outBatchIndex, 0, inColVector); } else { VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); } } else { if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.isNull[outBatchIndex] = false; outColVector.setElement(outBatchIndex, inBatchIndex, inColVector); } else { VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); @@ -237,12 +239,14 @@ void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBa if (inColVector.isRepeating) { if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.isNull[outBatchIndex] = false; outColVector.setElement(outBatchIndex, 0, inColVector); } else { VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); } } else { if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.isNull[outBatchIndex] = false; outColVector.setElement(outBatchIndex, inBatchIndex, inColVector); } else { VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java index 5d1db4d..6ae6727 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java @@ -150,7 +150,7 @@ public void copyGroupKey(VectorizedRowBatch inputBatch, VectorizedRowBatch outpu TimestampColumnVector inputColumnVector = (TimestampColumnVector) inputBatch.cols[inputColumnNum]; TimestampColumnVector outputColumnVector = (TimestampColumnVector) outputBatch.cols[outputColumnNum]; if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) { - + outputColumnVector.isNull[outputBatch.size] = false; outputColumnVector.setElement(outputBatch.size, 0, inputColumnVector); } else { outputColumnVector.noNulls = false; @@ -163,7 +163,7 @@ public void copyGroupKey(VectorizedRowBatch inputBatch, VectorizedRowBatch outpu IntervalDayTimeColumnVector inputColumnVector = (IntervalDayTimeColumnVector) inputBatch.cols[inputColumnNum]; IntervalDayTimeColumnVector outputColumnVector = (IntervalDayTimeColumnVector) outputBatch.cols[outputColumnNum]; if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) { - + outputColumnVector.isNull[outputBatch.size] = false; outputColumnVector.setElement(outputBatch.size, 0, inputColumnVector); } else { outputColumnVector.noNulls = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 8264e8a..dfa79e6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2936,6 +2936,20 @@ private VectorExpression getWhenExpression(List childExpr, childExpr.subList(2, childExpr.size())); } + if (isNullConst(thenDesc) && isNullConst(elseDesc)) { + + // THEN NULL ELSE NULL: An unusual "case", but possible. + final int outputColumnNum = ocm.allocateOutputColumn(returnType); + + final VectorExpression resultExpr = + new IfExprNullNull( + outputColumnNum); + + resultExpr.setOutputTypeInfo(returnType); + resultExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + + return resultExpr; + } if (isNullConst(thenDesc)) { final VectorExpression whenExpr = getVectorExpression(whenDesc, mode); final VectorExpression elseExpr = getVectorExpression(elseDesc, mode); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 66de847..44b7c95 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -97,19 +97,6 @@ public static void setNullColIsNullValue(ColumnVector cv, int rowIndex) { } /** - * Iterates thru all the column vectors and sets noNull to - * specified value. - * - * @param batch - * Batch on which noNull is set - */ - public static void setNoNullFields(VectorizedRowBatch batch) { - for (int i = 0; i < batch.numCols; i++) { - batch.cols[i].noNulls = true; - } - } - - /** * Iterates thru all the column vectors and sets repeating to * specified column. * diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 0e703a5..ff55f50 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -423,7 +423,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Boolean) value == true ? 1 : 0); - lcv.isNull[0] = false; } } break; @@ -436,7 +435,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Byte) value); - lcv.isNull[0] = false; } } break; @@ -449,7 +447,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Short) value); - lcv.isNull[0] = false; } } break; @@ -462,7 +459,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Integer) value); - lcv.isNull[0] = false; } } break; @@ -475,7 +471,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Long) value); - lcv.isNull[0] = false; } } break; @@ -488,7 +483,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill(DateWritable.dateToDays((Date) value)); - lcv.isNull[0] = false; } } break; @@ -501,7 +495,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Timestamp) value); - lcv.isNull[0] = false; } } break; @@ -514,7 +507,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill(((HiveIntervalYearMonth) value).getTotalMonths()); - lcv.isNull[0] = false; } } @@ -526,7 +518,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue icv.isRepeating = true; } else { icv.fill(((HiveIntervalDayTime) value)); - icv.isNull[0] = false; } } @@ -538,7 +529,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue dcv.isRepeating = true; } else { dcv.fill((Float) value); - dcv.isNull[0] = false; } } break; @@ -551,7 +541,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue dcv.isRepeating = true; } else { dcv.fill((Double) value); - dcv.isNull[0] = false; } } break; @@ -563,10 +552,7 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue dv.isNull[0] = true; dv.isRepeating = true; } else { - HiveDecimal hd = (HiveDecimal) value; - dv.set(0, hd); - dv.isRepeating = true; - dv.isNull[0] = false; + dv.fill((HiveDecimal) value); } } break; @@ -580,7 +566,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue bcv.isRepeating = true; } else { bcv.fill(bytes); - bcv.isNull[0] = false; } } break; @@ -595,8 +580,7 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue bcv.isNull[0] = true; bcv.isRepeating = true; } else { - bcv.setVal(0, sVal.getBytes()); - bcv.isRepeating = true; + bcv.fill(sVal.getBytes()); } } break; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java index c15bdc1..9ca2dbe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -58,7 +60,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -67,39 +68,53 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setDays(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setDays(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; setDays(outputColVector, vector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { setDays(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setDays(outputColVector, vector, i); + // Set isNull before call in case it changes it mind. outputIsNull[i] = inputIsNull[i]; + setDays(outputColVector, vector, i); } } else { + // Set isNull before calls in case tney change their mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { setDays(outputColVector, vector, i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java index a2e4a52..778aea4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -75,57 +77,75 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + outV.isRepeating = true; + if (inV.noNulls || !inV.isNull[0]) { + // Set isNull before call in case it changes it mind. + outV.isNull[0] = false; convert(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; convert(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { convert(outV, inV, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - convert(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; convert(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; convert(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java index aa529ed..7ad0493 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java @@ -64,6 +64,7 @@ protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) { outV.noNulls = false; return; } + outV.isNull[i] = false; switch (integerPrimitiveCategory) { case BYTE: outV.vector[i] = decWritable.byteValue(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java index 08abf27..956092a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -56,7 +58,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; double[] vector = inputColVector.vector; @@ -66,36 +67,58 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setDouble(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + setDouble(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; setDouble(outputColVector, vector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { setDouble(outputColVector, vector, i); } } outputColVector.isRepeating = false; } else /* there are nulls */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setDouble(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setDouble(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - setDouble(outputColVector, vector, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setDouble(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } outputColVector.isRepeating = false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java index df25eac..759c55c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -56,7 +58,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -65,39 +66,64 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setSeconds(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setSeconds(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; setSeconds(outputColVector, vector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { setSeconds(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setSeconds(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setSeconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { + // Set isNull before call in case it changes it mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - setSeconds(outputColVector, vector, i); + if (!inputIsNull[i]) { + setSeconds(outputColVector, vector, i); + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java index 42c34c8..51f340e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -54,7 +56,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -63,39 +64,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setMilliseconds(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setMilliseconds(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; setMilliseconds(outputColVector, vector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { setMilliseconds(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setMilliseconds(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setMilliseconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { + // Set isNull before calls in case they change their mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - setMilliseconds(outputColVector, vector, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setMilliseconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index 34269da..27eeb68 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -28,6 +28,7 @@ import org.apache.hive.common.util.DateParser; import java.nio.charset.StandardCharsets; +import java.util.Arrays; /** * Casts a string vector to a date vector. @@ -64,57 +65,74 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; evaluate(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { evaluate(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - evaluate(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; evaluate(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; evaluate(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java index 41443c5..bfc9953 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; @@ -81,46 +83,64 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } outV.isRepeating = false; @@ -128,7 +148,12 @@ public void evaluate(VectorizedRowBatch batch) { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } outV.isRepeating = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java index 3ea1e8c..7667e12 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -85,57 +87,77 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java index feb0ab6..1228cc7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -60,57 +62,74 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; IntervalDayTimeColumnVector outV = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; evaluate(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { evaluate(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - evaluate(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { evaluate(outV, inV, i); } } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { evaluate(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java index 09dd4d9..fdbb18b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -58,57 +60,82 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; evaluate(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { evaluate(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - evaluate(outV, inV, 0); - } - } else if (batch.selectedInUse) { + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); + } else { + outputIsNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); + } else { + outputIsNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java index a6cfee8..7eb07a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -183,57 +185,86 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; if (inV.isRepeating) { outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { + if (inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outV.isNull[0] = false; func(outV, inV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; } } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java index 1231cda..7869999 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -56,7 +58,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -65,39 +66,51 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = toBool(inputColVector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = toBool(inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = toBool(inputColVector, i); + outputIsNull[i] = false; + outputVector[i] = toBool(inputColVector, i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = toBool(inputColVector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = toBool(inputColVector, i); outputIsNull[i] = inputIsNull[i]; + outputVector[i] = toBool(inputColVector, i); } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = toBool(inputColVector, i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java index e696455..4af823e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -51,7 +53,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; double[] outputVector = outputColVector.vector; @@ -60,39 +61,62 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.getDouble(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.getDouble(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inputColVector.getDouble(i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inputColVector.getDouble(i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = inputColVector.getDouble(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + outputVector[i] = inputColVector.getDouble(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.getDouble(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + outputVector[i] = inputColVector.getDouble(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java index 36b9f13..8eb7cb3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -48,7 +50,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -57,39 +58,63 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.getTimestampAsLong(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.getTimestampAsLong(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inputColVector.getTimestampAsLong(i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inputColVector.getTimestampAsLong(i); } } outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = inputColVector.getTimestampAsLong(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + inputIsNull[i] = false; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } else { + inputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.getTimestampAsLong(i); + if (!inputIsNull[i]) { + inputIsNull[i] = false; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } else { + inputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java index 127e431..6fb29a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -66,104 +68,121 @@ public void evaluate(VectorizedRowBatch batch) { return; } + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + long vector1Value = vector1[0]; long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = vector1[0] & vector2[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1Value & vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] & vector2Value; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] & vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; } } - outV.isRepeating = false; } - outV.noNulls = true; - } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { + return; + } + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outV.noNulls = false; + + if (inputColVector1.noNulls && !inputColVector2.noNulls) { // only input 2 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0]; + outputIsNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } else { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } - outV.noNulls = false; } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { // only input 1 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -171,49 +190,46 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1); + outputIsNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); } } - outV.isRepeating = false; } outV.noNulls = false; } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ @@ -223,7 +239,7 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0]) + outputIsNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[0] && (vector2[0] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { @@ -231,32 +247,31 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } @@ -267,21 +282,19 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } - outV.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java index 3542a07..9208cd4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -69,104 +71,120 @@ public void evaluate(VectorizedRowBatch batch) { return; } + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + long vector1Value = vector1[0]; long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = vector1[0] | vector2[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1Value | vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] | vector2Value; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] | vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; } } - outV.isRepeating = false; } - outV.noNulls = true; - } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { + return; + } + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outV.noNulls = false; + + if (inputColVector1.noNulls && !inputColVector2.noNulls) { // only input 2 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0]; + outputIsNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } - outV.noNulls = false; } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { // only input 1 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -174,51 +192,47 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0); + outputIsNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); } } - outV.isRepeating = false; } - outV.noNulls = false; } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ // either input 1 or input 2 may have nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -226,7 +240,7 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0]) + outputIsNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[0] && (vector2[0] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { @@ -234,57 +248,53 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } - outV.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index c7cab2a..5b89131 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -121,70 +121,84 @@ public ConstantVectorExpression(int outputColumnNum, TypeInfo outputTypeInfo, bo isNullValue = isNull; } + /* + * In the following evaluate* methods, since we are supporting scratch column reuse, we must + * assume the column may have noNulls of false and some isNull entries true. + * + * So, do a proper assignments. + */ + private void evaluateLong(VectorizedRowBatch vrg) { + LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { + cv.isNull[0] = false; cv.vector[0] = longValue; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDouble(VectorizedRowBatch vrg) { DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { + cv.isNull[0] = false; cv.vector[0] = doubleValue; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateBytes(VectorizedRowBatch vrg) { BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; cv.initBuffer(); if (!isNullValue) { + cv.isNull[0] = false; cv.setVal(0, bytesValue, 0, bytesValueLength); } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDecimal(VectorizedRowBatch vrg) { DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.vector[0].set(decimalValue); + dcv.isNull[0] = false; + dcv.set(0, decimalValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } private void evaluateTimestamp(VectorizedRowBatch vrg) { - TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; - dcv.isRepeating = true; - dcv.noNulls = !isNullValue; + TimestampColumnVector tcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; + tcv.isRepeating = true; if (!isNullValue) { - dcv.set(0, timestampValue); + tcv.isNull[0] = false; + tcv.set(0, timestampValue); } else { - dcv.isNull[0] = true; + tcv.isNull[0] = true; + tcv.noNulls = false; } } private void evaluateIntervalDayTime(VectorizedRowBatch vrg) { IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { + dcv.isNull[0] = false; dcv.set(0, intervalDayTimeValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java index d91b09c..7342d9e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java @@ -82,12 +82,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java index 2699681..89df3bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -73,7 +74,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; outputColVector.isRepeating = inputColVector1.isRepeating; int n = batch.size; long[] vector1 = inputColVector1.vector; @@ -83,43 +83,62 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector1.isRepeating) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); - dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); - outputColVector.setFromScratchIntervalDayTime(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); + dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); + outputColVector.setFromScratchIntervalDayTime(0); + + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } + + if (inputColVector1.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); - outputIsNull[i] = inputIsNull[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java index 946b738..7d84b70 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -75,7 +76,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; outputColVector.isRepeating = inputColVector2.isRepeating; int n = batch.size; @@ -86,43 +86,61 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector2.isRepeating) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); - dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); - outputColVector.setFromScratchIntervalDayTime(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); + dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); + outputColVector.setFromScratchIntervalDayTime(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } + + if (inputColVector2.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); - outputIsNull[i] = inputIsNull[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java index 9a8177c..41d279f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import java.sql.Timestamp; @@ -80,8 +81,8 @@ public void evaluate(VectorizedRowBatch batch) { DecimalColumnVector inputColumnVector = (DecimalColumnVector) batch.cols[inputColumn]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColumnVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColumnVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; HiveDecimalWritable[] vector = inputColumnVector.vector; long[] outputVector = outputColVector.vector; @@ -91,49 +92,51 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColumnVector.noNulls; - if (inputColumnVector.noNulls) { - if (inputColumnVector.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColumnVector.isRepeating) { + if (inputColumnVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = inSet.contains(vector[0]) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColumnVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } } else { - if (inputColumnVector.isRepeating) { - - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.contains(vector[0]) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java index 791d8f2..7a0ee86 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -58,40 +60,52 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; outV.initBuffer(); + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { //Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -99,7 +113,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -107,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java index ba83b6a..33911fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java @@ -65,8 +65,8 @@ public void evaluate(VectorizedRowBatch batch) { DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; double[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -76,49 +76,52 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java index c8b1dad..ec9ab01 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java @@ -58,19 +58,34 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; double[] vector = inputColVector.vector; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; outV.initBuffer(); + boolean[] inputIsNull = inputColVector.isNull; if (n == 0) { //Nothing to do return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, vector, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; func(outV, vector, i); @@ -82,18 +97,13 @@ public void evaluate(VectorizedRowBatch batch) { } outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { // Handle case with nulls. Don't do function if the value is null, // because the data may be undefined for a null value. outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - func(outV, vector, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inputColVector.isNull[i]; @@ -101,7 +111,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, vector, i); } } - outV.isRepeating = false; } else { System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -109,7 +118,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, vector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java index 1c1bc0b..252a816 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java @@ -77,73 +77,73 @@ public DynamicValueVectorExpression(int outputColumnNum, TypeInfo typeInfo, private void evaluateLong(VectorizedRowBatch vrg) { LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { - cv.vector[0] = longValue; cv.isNull[0] = false; + cv.vector[0] = longValue; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDouble(VectorizedRowBatch vrg) { DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { - cv.vector[0] = doubleValue; cv.isNull[0] = false; + cv.vector[0] = doubleValue; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateBytes(VectorizedRowBatch vrg) { BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; cv.initBuffer(); if (!isNullValue) { - cv.setVal(0, bytesValue, 0, bytesValueLength); cv.isNull[0] = false; + cv.setVal(0, bytesValue, 0, bytesValueLength); } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDecimal(VectorizedRowBatch vrg) { DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.vector[0].set(decimalValue); dcv.isNull[0] = false; + dcv.set(0, decimalValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } private void evaluateTimestamp(VectorizedRowBatch vrg) { TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.set(0, timestampValue); dcv.isNull[0] = false; + dcv.set(0, timestampValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } private void evaluateIntervalDayTime(VectorizedRowBatch vrg) { IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.set(0, intervalDayTimeValue); dcv.isNull[0] = false; + dcv.set(0, intervalDayTimeValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java index 28d800e..7c06697 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,41 +59,53 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -99,7 +113,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -107,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java index 5fb9778..5480949 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -70,41 +72,53 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -112,7 +126,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -120,7 +133,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java index f518f39..92602dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -58,41 +60,53 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; TimestampColumnVector outV = (TimestampColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -100,7 +114,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -108,7 +121,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java index e632ff9..bde61b9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,41 +59,53 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -99,7 +113,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -107,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java index d500612..fd8d148 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,41 +59,53 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -99,7 +113,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -107,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java index f93dbfc..4a35a54 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -73,55 +74,66 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; outV.initBuffer(); + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { //Nothing to do return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; prepareResult(0, vector, outV); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; prepareResult(i, vector, outV); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { prepareResult(i, vector, outV); } - outV.isRepeating = false; } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - prepareResult(0, vector, outV); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { prepareResult(i, vector, outV); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { + outV.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { prepareResult(i, vector, outV); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java index 1a94408..bdbf4d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java @@ -55,9 +55,12 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; double[] outputVector = outputColVector.vector; - outputColVector.noNulls = true; outputColVector.isRepeating = false; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // return immediately if batch is empty if (n == 0) { return; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java index d289dff..96ce35d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java @@ -55,9 +55,12 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; double[] outputVector = outputColVector.vector; - outputColVector.noNulls = true; outputColVector.isRepeating = false; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // return immediately if batch is empty if (n == 0) { return; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java index ff8593e..3f1a8be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java @@ -59,7 +59,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; HiveDecimalWritable[] vector = inputColVector.vector; @@ -68,14 +67,23 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // All must be selected otherwise size would be zero - // Repeating property will not change. - outputIsNull[0] = inputIsNull[0]; - round(0, vector[0], decimalPlaces, outputColVector); + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + round(0, vector[0], decimalPlaces, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -92,8 +100,11 @@ public void evaluate(VectorizedRowBatch batch) { round(i, vector[i], decimalPlaces, outputColVector); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -106,7 +117,6 @@ public void evaluate(VectorizedRowBatch batch) { round(i, vector[i], decimalPlaces, outputColVector); } } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java index d474ff0..ead44eb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -54,40 +56,52 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputCol]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; if (n == 0) { //Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -103,7 +117,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java index 93cf1ec..cc97889 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,6 +59,8 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; if (n == 0) { @@ -64,35 +68,43 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -100,7 +112,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -108,7 +119,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java index 9eb4312..2cd314b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -58,6 +60,8 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; if (n == 0) { @@ -65,35 +69,43 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -101,7 +113,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -109,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java index f9b3f76..d72c7d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java @@ -64,39 +64,136 @@ public void evaluate(VectorizedRowBatch batch) { return; } - arg2ColVector.flatten(batch.selectedInUse, sel, n); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + /* + * Repeating IF expression? + */ if (arg1ColVector.isRepeating) { - if (!null1[0] && vector1[0] == 1) { - outputColVector.setElement(0, 0, arg2ColVector); + if ((arg1ColVector.noNulls || !null1[0]) && vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { + outputColVector.isRepeating = true; outputColVector.noNulls = false; isNull[0] = true; } return; } - if (batch.selectedInUse) { - for (int j = 0; j < n; j++) { - int i = sel[j]; - if (!null1[0] && vector1[i] == 1) { - outputColVector.setElement(i, i, arg2ColVector); + + if (arg1ColVector.noNulls) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + /* + * Repeating THEN expression? + */ + if (arg2ColVector.isRepeating) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + for (int i = 0; i < n; i++) { + if (vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } } else { - outputColVector.noNulls = false; - isNull[i] = true; + for (int i = 0; i < n; i++) { + if (vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } } } - } else { - for (int i = 0; i < n; i++) { - if (!null1[0] && vector1[i] == 1) { - outputColVector.setElement(i, i, arg2ColVector); + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + /* + * Repeating THEN expression? + */ + if (arg2ColVector.isRepeating) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[i] && vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } } else { - outputColVector.noNulls = false; - isNull[i] = true; + for (int i = 0; i < n; i++) { + if (!null1[i] && vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[i] && vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + for (int i = 0; i < n; i++) { + if (!null1[i] && vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } } } } - - arg2ColVector.unFlatten(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java index e7d4e4d..2c8a0b4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java @@ -65,9 +65,8 @@ public void evaluate(VectorizedRowBatch batch) { DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + int n = batch.size; long[] vector1 = arg1ColVector.vector; double[] vector2 = arg2ColVector.vector; double[] vector3 = arg3ColVector.vector; @@ -78,6 +77,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + /* All the code paths below propagate nulls even if neither arg2 nor arg3 * have nulls. This is to reduce the number of code paths and shorten the * code, at the expense of maybe doing unnecessary work if neither input @@ -85,7 +87,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -98,6 +100,15 @@ public void evaluate(VectorizedRowBatch batch) { arg3ColVector.flatten(batch.selectedInUse, sel, n); if (arg1ColVector.noNulls) { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -112,7 +123,16 @@ public void evaluate(VectorizedRowBatch batch) { arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java index fa7b2da..8c1cd90 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java @@ -65,8 +65,7 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -75,6 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + /* All the code paths below propagate nulls even if neither arg2 nor arg3 * have nulls. This is to reduce the number of code paths and shorten the * code, at the expense of maybe doing unnecessary work if neither input @@ -82,7 +84,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -95,6 +97,15 @@ public void evaluate(VectorizedRowBatch batch) { arg3ColVector.flatten(batch.selectedInUse, sel, n); if (arg1ColVector.noNulls) { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -110,6 +121,15 @@ public void evaluate(VectorizedRowBatch batch) { } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java index 487fb97..905ffba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; @@ -67,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -78,7 +82,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { outputColVector.fill(arg3Scalar); @@ -94,14 +98,25 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3Scalar); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3Scalar); } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java index 7b18cf8..feab24a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; @@ -67,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -78,7 +82,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -96,14 +100,25 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java index 0ba6722..7d7184a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java @@ -68,8 +68,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = false; // output is a scalar which we know is non null - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -79,23 +81,37 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { outputColVector.fill(arg3Scalar); } - } else if (arg1ColVector.noNulls) { + return; + } + + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index 0c8a2f6..d8ec895 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -64,8 +64,10 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; long[] vector2 = arg2ColVector.vector; @@ -96,6 +98,9 @@ public void evaluate(VectorizedRowBatch batch) { arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java index 85c37f9..3cb86e5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java @@ -64,39 +64,132 @@ public void evaluate(VectorizedRowBatch batch) { return; } - arg2ColVector.flatten(batch.selectedInUse, sel, n); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + /* + * Repeating IF expression? + */ if (arg1ColVector.isRepeating) { - if (!null1[0] && vector1[0] == 1) { + if ((arg1ColVector.noNulls || !null1[0]) && vector1[0] == 1) { + outputColVector.isRepeating = true; outputColVector.noNulls = false; isNull[0] = true; } else { - outputColVector.setElement(0, 0, arg2ColVector); + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } return; } - if (batch.selectedInUse) { - for (int j = 0; j < n; j++) { - int i = sel[j]; - if (!null1[0] && vector1[i] == 1) { - outputColVector.noNulls = false; - isNull[i] = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (arg1ColVector.noNulls) { + + /* + * Repeating ELSE expression? + */ + if (arg2ColVector.isRepeating) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } + } } else { - outputColVector.setElement(i, i, arg2ColVector); + for (int i = 0; i < n; i++) { + if (vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } + } + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } } + } else { + for (int i = 0; i < n; i++) { + if (vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } + } + } } } else { - for (int i = 0; i < n; i++) { - if (!null1[0] && vector1[i] == 1) { - outputColVector.noNulls = false; - isNull[i] = true; + + /* + * Repeating ELSE expression? + */ + if (arg2ColVector.isRepeating) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[0] && vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } + } } else { - outputColVector.setElement(i, i, arg2ColVector); + for (int i = 0; i < n; i++) { + if (!null1[0] && vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } + } + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[0] && vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } + } + } else { + for (int i = 0; i < n; i++) { + if (!null1[0] && vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } + } } } } - - arg2ColVector.unFlatten(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java new file mode 100644 index 0000000..5a68cec --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class IfExprNullNull extends VectorExpression { + + private static final long serialVersionUID = 1L; + + public IfExprNullNull(int outputColumnNum) { + super(outputColumnNum); + } + + public IfExprNullNull() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + final ColumnVector outputColVector = batch.cols[outputColumnNum]; + + // We do not need to do a column reset since we are carefully changing the output. + + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; + } + + @Override + public String vectorExpressionParameters() { + return "null, null"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + throw new UnsupportedOperationException("Undefined descriptor"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java index 09aa9ab..deb4606 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java @@ -67,8 +67,15 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -98,6 +105,11 @@ public void evaluate(VectorizedRowBatch batch) { arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java index 9167178..3cd3755 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java @@ -69,8 +69,15 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -99,6 +106,11 @@ public void evaluate(VectorizedRowBatch batch) { // extend any repeating values and noNulls indicator in the inputs arg2ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java index 84d0052..51b03dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java @@ -70,8 +70,15 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -100,6 +107,11 @@ public void evaluate(VectorizedRowBatch batch) { // extend any repeating values and noNulls indicator in the input arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java index 5ed457b..9c0e7be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -67,8 +68,11 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - outputColVector.noNulls = true; // output must be a scalar and neither one is null - outputColVector.isRepeating = false; // may override later + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -80,11 +84,12 @@ public void evaluate(VectorizedRowBatch batch) { outputColVector.initBuffer(); if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar); + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.setRef(0, arg2Scalar, 0, arg2Scalar.length); } else { - outputColVector.fill(arg3Scalar); + outputColVector.setRef(0, arg3Scalar, 0, arg3Scalar.length); } + outputColVector.isRepeating = true; return; } @@ -92,6 +97,7 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; if (vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { @@ -99,6 +105,7 @@ public void evaluate(VectorizedRowBatch batch) { } } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { if (vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); @@ -111,6 +118,7 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; if (!arg1ColVector.isNull[i] && vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { @@ -118,6 +126,7 @@ public void evaluate(VectorizedRowBatch batch) { } } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { if (!arg1ColVector.isNull[i] && vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java index ee3cd19..3c83566 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -64,8 +66,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -81,7 +85,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -93,6 +97,11 @@ public void evaluate(VectorizedRowBatch batch) { arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java index b98ddbe..c0cb2c1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java @@ -19,13 +19,12 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** * Compute IF(expr1, expr2, expr3) for 3 input column expressions. @@ -70,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -81,7 +82,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { outputColVector.fill(arg3Scalar); @@ -93,13 +94,19 @@ public void evaluate(VectorizedRowBatch batch) { // reduce the number of code paths needed below. arg2ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Since we always set a value, make sure all isNull entries are set to false. + */ + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3Scalar); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3Scalar); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java index abd585d..823b87c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java @@ -69,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -80,7 +82,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -98,6 +100,7 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i)); } } else { @@ -106,6 +109,12 @@ public void evaluate(VectorizedRowBatch batch) { } } } else /* there are nulls */ { + + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java index 24299e9..1649237 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java @@ -68,8 +68,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = false; // output is a scalar which we know is non null - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -79,18 +81,27 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { outputColVector.fill(arg3Scalar); } - } else if (arg1ColVector.noNulls) { + return; + } + + /* + * Since we always set a value, make sure all isNull entries are set to false. + */ + + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); } @@ -99,16 +110,16 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2Scalar : arg3Scalar); - outputIsNull[i] = false; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2Scalar : arg3Scalar); } - Arrays.fill(outputIsNull, 0, n, false); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java index 6b141d1..a5cddc6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -52,35 +54,45 @@ public void evaluate(VectorizedRowBatch batch) { ColumnVector inputColVector = batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; int n = batch.size; - long[] outputVector = ((LongColumnVector) batch.cols[outputColumnNum]).vector; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + boolean[] outputIsNull = outputColVector.isNull; if (n <= 0) { // Nothing to do return; } - // output never has nulls for this operator - batch.cols[outputColumnNum].noNulls = true; - if (inputColVector.noNulls) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.noNulls) { + outputColVector.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = 1; - batch.cols[outputColumnNum].isRepeating = true; } else if (inputColVector.isRepeating) { - // All must be selected otherwise size would be zero - // Selection property will not change. - outputVector[0] = nullPos[0] ? 0 : 1; - batch.cols[outputColumnNum].isRepeating = true; + outputColVector.isRepeating = true; + outputIsNull[0] = false; + outputVector[0] = inputIsNull[0] ? 0 : 1; } else { - batch.cols[outputColumnNum].isRepeating = false; + + /* + * Since we have a result for all rows, we don't need to do conditional NULL maintenance or + * turn off noNulls.. + */ + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = nullPos[i] ? 0 : 1; + outputIsNull[i] = false; + outputVector[i] = inputIsNull[i] ? 0 : 1; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { - outputVector[i] = nullPos[i] ? 0 : 1; + outputVector[i] = inputIsNull[i] ? 0 : 1; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java index 7347800..17d567f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -52,34 +54,47 @@ public void evaluate(VectorizedRowBatch batch) { ColumnVector inputColVector = batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; int n = batch.size; - long[] outputVector = ((LongColumnVector) batch.cols[outputColumnNum]).vector; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + boolean[] outputIsNull = outputColVector.isNull; + if (n <= 0) { // Nothing to do, this is EOF return; } - // output never has nulls for this operator - batch.cols[outputColumnNum].noNulls = true; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.noNulls) { + outputColVector.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = 0; - batch.cols[outputColumnNum].isRepeating = true; } else if (inputColVector.isRepeating) { - outputVector[0] = nullPos[0] ? 1 : 0; - batch.cols[outputColumnNum].isRepeating = true; + outputColVector.isRepeating = true; + outputIsNull[0] = false; + outputVector[0] = inputIsNull[0] ? 1 : 0; } else { + + /* + * Since we have a result for all rows, we don't need to do conditional NULL maintenance or + * turn off noNulls.. + */ + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = nullPos[i] ? 1 : 0; + outputIsNull[i] = false; + outputVector[i] = inputIsNull[i] ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { - outputVector[i] = nullPos[i] ? 1 : 0; + outputVector[i] = inputIsNull[i] ? 1 : 0; } } - batch.cols[outputColumnNum].isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java index dfe3bd1..9d22a3c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java @@ -56,7 +56,9 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector indexColumnVector = (LongColumnVector) batch.cols[indexColumnNum]; long[] indexV = indexColumnVector.vector; - outV.noNulls = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (listV.isRepeating) { if (listV.isNull[0]) { outV.isNull[0] = true; @@ -68,8 +70,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[0] = true; outV.noNulls = false; } else { - outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV); outV.isNull[0] = false; + outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV); } outV.isRepeating = true; } else { @@ -79,11 +81,11 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[j] = true; outV.noNulls = false; } else { - outV.setElement(j, (int) (listV.offsets[0] + indexV[j]), childV); outV.isNull[j] = false; + outV.setElement(j, (int) (listV.offsets[0] + indexV[j]), childV); + } } - outV.isRepeating = false; } } } else { @@ -93,11 +95,10 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[j] = true; outV.noNulls = false; } else { - outV.setElement(j, (int) (listV.offsets[j] + indexV[j]), childV); outV.isNull[j] = false; + outV.setElement(j, (int) (listV.offsets[j] + indexV[j]), childV); } } - outV.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java index 62860df..948652a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java @@ -53,7 +53,10 @@ public void evaluate(VectorizedRowBatch batch) { ListColumnVector listV = (ListColumnVector) batch.cols[listColumnNum]; ColumnVector childV = listV.child; - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (listV.isRepeating) { if (listV.isNull[0]) { outV.isNull[0] = true; @@ -63,8 +66,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[0] = true; outV.noNulls = false; } else { - outV.setElement(0, (int) (listV.offsets[0] + index), childV); outV.isNull[0] = false; + outV.setElement(0, (int) (listV.offsets[0] + index), childV); } } outV.isRepeating = true; @@ -75,8 +78,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[j] = true; outV.noNulls = false; } else { - outV.setElement(j, (int) (listV.offsets[j] + index), childV); outV.isNull[j] = false; + outV.setElement(j, (int) (listV.offsets[j] + index), childV); } } outV.isRepeating = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java index c2f7143..42483c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java @@ -69,9 +69,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java index 0991bda..75adda8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java @@ -60,7 +60,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java index 2d66cee..608c32a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java @@ -66,17 +66,14 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. */ @@ -117,9 +114,9 @@ public void evaluate(VectorizedRowBatch batch) { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java index 242fddc..789a01d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -25,8 +27,8 @@ public class LongColEqualLongScalar extends VectorExpression { private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongColEqualLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -45,6 +47,12 @@ public LongColEqualLongScalar() { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -52,55 +60,82 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] == value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = vector[0] == value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] == value ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] == value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" + outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" - outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] == value ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] == value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" + outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] == value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java index dc1a331..eb040ca 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java @@ -66,17 +66,14 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. */ @@ -117,9 +114,9 @@ public void evaluate(VectorizedRowBatch batch) { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java index 633015e..7f622b4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private int colNum; - private long value; + protected int colNum; + protected long value; public LongColGreaterEqualLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] >= value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] >= value ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" - outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] >= value ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] >= value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] >= value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] >= value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java index e56d800..9ab9e1e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java @@ -66,17 +66,14 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. */ @@ -117,9 +114,9 @@ public void evaluate(VectorizedRowBatch batch) { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java index 25c07df..eec3b89 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -25,8 +27,8 @@ public class LongColGreaterLongScalar extends VectorExpression { private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongColGreaterLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -52,8 +54,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -63,44 +65,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] > value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] > value ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a > b" is "(b - a) >>> 63" - outputVector[i] = (value - vector[i]) >>> 63; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] > value ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] > value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] > value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] > value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (value - vector[i]) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java index f052675..004bf4f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java @@ -66,17 +66,14 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. */ @@ -117,9 +114,9 @@ public void evaluate(VectorizedRowBatch batch) { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java index 1e5b349..d2826c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private int colNum; - private long value; + protected int colNum; + protected long value; public LongColLessEqualLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] <= value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] <= value ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" - outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] <= value ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] <= value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] <= value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] <= value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java index fe700c3..3a3425b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java @@ -66,12 +66,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java index 2f282a9..a7a9965 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongColLessLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] < value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] < value ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a < b" is "(a - b) >>> 63" - outputVector[i] = (vector[i] - value) >>> 63; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] < value ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] < value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] < value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] < value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (vector[i] - value) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java index 19fc3a6..cfd61a9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java @@ -71,12 +71,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java index 8307e78..833b8fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java @@ -66,17 +66,14 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. */ @@ -117,9 +114,9 @@ public void evaluate(VectorizedRowBatch batch) { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java index 0e78f8d..0e76019 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongColNotEqualLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] != value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] != value ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" - outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] != value ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] != value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] != value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] != value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java index 6c5bb68..d7be120 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java @@ -64,8 +64,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -75,49 +75,52 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java index 7cdce0b..2a5b0b5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java @@ -60,7 +60,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java index 8d915c2..b767a54 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarEqualLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] == value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value == vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value == vector[i] ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value == vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" + outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" - outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value == vector[0] ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value == vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" + outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value == vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java index a06fb08..0279cc1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -25,8 +27,8 @@ public class LongScalarGreaterEqualLongColumn extends VectorExpression { private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarGreaterEqualLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -52,8 +54,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -63,44 +65,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value >= vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value >= vector[i] ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" - outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value >= vector[0] ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value >= vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value >= vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value >= vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java index 6610288..4a0a376 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private int colNum; - private long value; + protected int colNum; + protected long value; public LongScalarGreaterLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value > vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value > vector[i] ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a > b" is "(b - a) >>> 63" - outputVector[i] = (vector[i] - value) >>> 63; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value > vector[0] ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value > vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value > vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value > vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (vector[i] - value) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java index 7a305d3..eb8973f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarLessEqualLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value <= vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value <= vector[i] ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" - outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value <= vector[0] ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value <= vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value <= vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value <= vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java index 763dfdf..0b71893 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarLessLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value < vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value < vector[i] ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a < b" is "(a - b) >>> 63" - outputVector[i] = (value - vector[i]) >>> 63; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value < vector[0] ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value < vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value < vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value < vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (value - vector[i]) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java index aecaed2..f5180a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarNotEqualLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value != vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value != vector[i] ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" - outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value != vector[0] ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value != vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value != vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value != vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java index c52e337..a9e2691 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java @@ -65,35 +65,47 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, vector, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; func(outV, vector, i); } - outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { + outputIsNull[i] = false; func(outV, vector, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... // Handle case with nulls. Don't do function if the value is null, // because the data may be undefined for a null value. outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - func(outV, vector, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inputColVector.isNull[i]; @@ -109,7 +121,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, vector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java index ccc0fcb..22321ca 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -64,7 +66,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -74,38 +75,52 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { - outputVector[0] = func(vector[0]); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = func(vector[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + cleanup(outputColVector, sel, batch.selectedInUse, n); + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = func(vector[i]); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = func(vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = func(vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } cleanup(outputColVector, sel, batch.selectedInUse, n); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java index 3375a56..3625a36 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -63,7 +65,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -73,38 +74,52 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { - outputVector[0] = func(vector[0]); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = func(vector[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + cleanup(outputColVector, sel, batch.selectedInUse, n); + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = func(vector[i]); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = func(vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = func(vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } cleanup(outputColVector, sel, batch.selectedInUse, n); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java index 898cf96..bd756a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -62,7 +64,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -72,38 +73,52 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { - outputVector[0] = func(vector[0]); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = func(vector[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = func(vector[i]); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } } outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = func(vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = func(vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java index 30f20f3..be69f7f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -54,45 +56,61 @@ public void evaluate(VectorizedRowBatch batch) { long[] vector = inputColVector.vector; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; long[] outputVector = outV.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; if (n <= 0) { // Nothing to do, this is EOF return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; // 0 XOR 1 yields 1, 1 XOR 1 yields 0 outputVector[0] = vector[0] ^ 1; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = false; outputVector[i] = vector[i] ^ 1; } - outV.isRepeating = false; } else { + Arrays.fill(outV.isNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector[i] ^ 1; } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outputVector[0] = vector[0] ^ 1; - outV.isNull[0] = inputColVector.isNull[0]; - } else if (batch.selectedInUse) { - outV.isRepeating = false; + + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] ^ 1; outV.isNull[i] = inputColVector.isNull[i]; } } else { - outV.isRepeating = false; for (int i = 0; i != n; i++) { outputVector[i] = vector[i] ^ 1; outV.isNull[i] = inputColVector.isNull[i]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java index eaaade6..3c18853 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java @@ -284,25 +284,56 @@ public static void setNullAndDivBy0DataEntriesLong( } /* - * Propagate null values for a two-input operator. + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. */ public static void propagateNullsColCol(ColumnVector inputColVector1, ColumnVector inputColVector2, ColumnVector outputColVector, int[] sel, int n, boolean selectedInUse) { - outputColVector.noNulls = inputColVector1.noNulls && inputColVector2.noNulls; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - if (outputColVector.noNulls) { - // the inputs might not always have isNull initialized for - // inputColVector1.isNull[i] || inputColVector2.isNull[i] to be valid - Arrays.fill(outputColVector.isNull, false); - return; - } + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.isNull[0] = false; + outputColVector.isRepeating = true; + } else { + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = false; + } + } else { + Arrays.fill(outputColVector.isNull, 0, n, false); + } + } + } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - if (inputColVector2.isRepeating) { - outputColVector.isNull[0] = inputColVector2.isNull[0]; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (!inputColVector2.isNull[0]) { + outputColVector.isNull[0] = false; + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputColVector2.isRepeating) { + if (!inputColVector2.isNull[0]) { + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = false; + } + } else { + Arrays.fill(outputColVector.isNull, 0, n, false); + } + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; // Because every value will be NULL. + } } else { + outputColVector.noNulls = false; if (selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -313,9 +344,32 @@ public static void propagateNullsColCol(ColumnVector inputColVector1, } } } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - if (inputColVector1.isRepeating) { - outputColVector.isNull[0] = inputColVector1.isNull[0]; + + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (!inputColVector1.isNull[0]) { + outputColVector.isNull[0] = false; + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputColVector1.isRepeating) { + if (!inputColVector1.isNull[0]) { + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = false; + } + } else { + Arrays.fill(outputColVector.isNull, 0, n, false); + } + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; // Because every value will be NULL. + } } else { + outputColVector.noNulls = false; if (selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -326,18 +380,23 @@ public static void propagateNullsColCol(ColumnVector inputColVector1, } } } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputColVector.isNull[0] = inputColVector1.isNull[0] || inputColVector2.isNull[0]; - if (outputColVector.isNull[0]) { - outputColVector.isRepeating = true; - return; + if (!inputColVector1.isNull[0] && !inputColVector2.isNull[0]) { + outputColVector.isNull[0] = false; + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; } + outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + if (inputColVector1.isNull[0]) { outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; // because every value will be NULL - return; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; // Because every value will be NULL. } else { + outputColVector.noNulls = false; if (selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -352,9 +411,10 @@ public static void propagateNullsColCol(ColumnVector inputColVector1, } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (inputColVector2.isNull[0]) { outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; // because every value will be NULL - return; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; // Because every value will be NULL. } else { + outputColVector.noNulls = false; if (selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -366,6 +426,7 @@ public static void propagateNullsColCol(ColumnVector inputColVector1, } } } else { // neither side is repeating + outputColVector.noNulls = false; if (selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java index bfd7334..6450a0a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java @@ -60,8 +60,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.noNulls) { - outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; resultLen[0] = length[0]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java index 20a0a37..db684c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java @@ -15,10 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; import org.apache.hadoop.hive.ql.exec.vector.expressions.AbstractFilterStringColLikeStringScalar.Checker; @@ -70,42 +71,50 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; long[] outputVector = outV.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; // return immediately if batch is empty if (n == 0) { return; } - outV.noNulls = inputColVector.noNulls; - outV.isRepeating = inputColVector.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = (checker.check(vector[0], start[0], length[0]) ? 1 : 0); - outV.isNull[0] = false; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0); outV.isNull[i] = false; + outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0); } } else { + Arrays.fill(outV.isNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0); - outV.isNull[i] = false; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = (checker.check(vector[0], start[0], length[0]) ? 1 : 0); - outV.isNull[0] = false; - } else { - outputVector[0] = LongColumnVector.NULL_VALUE; - outV.isNull[0] = true; - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos[i]) { @@ -114,6 +123,7 @@ public void evaluate(VectorizedRowBatch batch) { } else { outputVector[i] = LongColumnVector.NULL_VALUE; outV.isNull[i] = true; + outV.noNulls = false; } } } else { @@ -124,11 +134,12 @@ public void evaluate(VectorizedRowBatch batch) { } else { outputVector[i] = LongColumnVector.NULL_VALUE; outV.isNull[i] = true; + outV.noNulls = false; } } } } - } + } private Checker borrowChecker() { FilterStringColLikeStringScalar fil = new FilterStringColLikeStringScalar(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java index c889ac1..60040a5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java @@ -74,57 +74,65 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; int n = batch.size; byte[][] vector = inputColVector.vector; int[] start = inputColVector.start; int[] len = inputColVector.length; long[] outputVector = outputColVector.vector; + boolean[] outputIsNull = outputColVector.isNull; // return immediately if batch is empty if (n == 0) { return; } - outputColVector.isRepeating = inputColVector.isRepeating; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { + } else /* there are nulls in the inputColVector */ { - // All must be selected otherwise size would be zero - // Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0; - } - outputColVector.isNull[0] = nullPos[0]; - } else if (batch.selectedInUse) { + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + outputColVector.isNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; } - outputColVector.isNull[i] = nullPos[i]; } } else { - System.arraycopy(nullPos, 0, outputColVector.isNull, 0, n); + System.arraycopy(inputIsNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java index f730c9d..ccd947c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -62,6 +63,8 @@ public void evaluate(VectorizedRowBatch batch) { byte[][] vector = inputColVector.vector; int[] start = inputColVector.start; int[] length = inputColVector.length; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; if (n == 0) { @@ -72,53 +75,60 @@ public void evaluate(VectorizedRowBatch batch) { // initialize output vector buffer to receive data outV.initBuffer(); - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); } - outV.isRepeating = false; } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i != n; i++) { outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... /* * Handle case with nulls. Don't do function if the value is null, to save time, * because calling the function can be expensive. */ outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { + outV.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java index cbdcc76..b202b4e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java @@ -74,6 +74,11 @@ public void evaluate(VectorizedRowBatch batch) { // prepare output buffer to accept results outV.initBuffer(); + if (!outV.noNulls) { + // TEMPORARILY: + outV.reset(); + } + /* Handle default case for isRepeating setting for output. This will be set to true * later in the special cases where that is necessary. */ @@ -81,7 +86,7 @@ public void evaluate(VectorizedRowBatch batch) { if (inV1.noNulls && !inV2.noNulls) { - // propagate nulls + // Carefully handle NULLs... /* We'll assume that there *may* be nulls in the input if !noNulls is true * for an input vector. This is to be more forgiving of errors in loading @@ -89,6 +94,7 @@ public void evaluate(VectorizedRowBatch batch) { * isNull[0] is set if !noNulls and isRepeating are true for the vector. */ outV.noNulls = false; + if (inV2.isRepeating) { if (inV2.isNull[0]) { @@ -321,8 +327,9 @@ public void evaluate(VectorizedRowBatch batch) { } } else { // there are no nulls in either input vector - // propagate null information - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ // perform data operation if (inV1.isRepeating && inV2.isRepeating) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java index 9b9c063..e537f8c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -60,59 +62,64 @@ public void evaluate(VectorizedRowBatch batch) { int [] start = inputColVector.start; int [] length = inputColVector.length; long[] resultLen = outV.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; if (n == 0) { //Nothing to do return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; resultLen[0] = utf8StringLength(vector[0], start[0], length[0]); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = false; resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } - outV.isRepeating = false; } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i != n; i++) { resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - /* - * Handle case with nulls. Don't do function if the value is null, to save time, - * because calling the function can be expensive. - */ + // Carefully handle NULLs... outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - resultLen[0] = utf8StringLength(vector[0], start[0], length[0]); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } - outV.isNull[i] = inputColVector.isNull[i]; } outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { + outV.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java index 94fbef8..6f75a91 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -62,6 +63,8 @@ public void evaluate(VectorizedRowBatch batch) { byte[][] vector = inputColVector.vector; int[] start = inputColVector.start; int[] length = inputColVector.length; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; if (n == 0) { @@ -72,37 +75,46 @@ public void evaluate(VectorizedRowBatch batch) { // initialize output vector buffer to receive data outV.initBuffer(); - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); } - outV.isRepeating = false; } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i != n; i++) { outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... /* * Handle case with nulls. Don't do function if the value is null, to save time, * because calling the function can be expensive. */ outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; if (!inputColVector.isNull[i]) { @@ -110,7 +122,6 @@ public void evaluate(VectorizedRowBatch batch) { } outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { @@ -118,7 +129,6 @@ public void evaluate(VectorizedRowBatch batch) { } outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java index 5934f6f..de416a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java @@ -139,15 +139,17 @@ public void evaluate(VectorizedRowBatch batch) { int[] start = inV.start; outV.initBuffer(); + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.isRepeating) { - outV.isRepeating = true; if (!inV.noNulls && inV.isNull[0]) { outV.isNull[0] = true; outV.noNulls = false; outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); return; } else { - outV.noNulls = true; + outV.isNull[0] = false; int offset = getSubstrStartOffset(vector[0], start[0], len[0], startIdx); if (offset != -1) { outV.setVal(0, vector[0], offset, len[0] - (offset - start[0])); @@ -155,58 +157,55 @@ public void evaluate(VectorizedRowBatch batch) { outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); } } - } else { - outV.isRepeating = false; - if (batch.selectedInUse) { - if (!inV.noNulls) { - outV.noNulls = false; - for (int i = 0; i != n; ++i) { - int selected = sel[i]; - if (!inV.isNull[selected]) { - int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], - startIdx); - outV.isNull[selected] = false; - if (offset != -1) { - outV.setVal(selected, vector[selected], offset, - len[selected] - (offset - start[selected])); - } else { - outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); - } - } else { - outV.isNull[selected] = true; - } - } - } else { - outV.noNulls = true; - for (int i = 0; i != n; ++i) { - int selected = sel[i]; + outV.isRepeating = true; + return; + } + + if (batch.selectedInUse) { + if (!inV.noNulls) /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outV.noNulls = false; + + for (int i = 0; i != n; ++i) { + int selected = sel[i]; + if (!inV.isNull[selected]) { int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], startIdx); + outV.isNull[selected] = false; if (offset != -1) { outV.setVal(selected, vector[selected], offset, len[selected] - (offset - start[selected])); } else { outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); } + } else { + outV.isNull[selected] = true; } } } else { - if (!inV.noNulls) { - outV.noNulls = false; - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); - for (int i = 0; i != n; ++i) { - if (!inV.isNull[i]) { - int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx); - if (offset != -1) { - outV.setVal(i, vector[i], offset, len[i] - (offset - start[i])); - } else { - outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); - } - } + for (int i = 0; i != n; ++i) { + int selected = sel[i]; + outV.isNull[selected] = false; + int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], + startIdx); + if (offset != -1) { + outV.setVal(selected, vector[selected], offset, + len[selected] - (offset - start[selected])); + } else { + outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); } - } else { - outV.noNulls = true; - for (int i = 0; i != n; ++i) { + } + } + } else { + if (!inV.noNulls) /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outV.noNulls = false; + + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for (int i = 0; i != n; ++i) { + if (!inV.isNull[i]) { int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx); if (offset != -1) { outV.setVal(i, vector[i], offset, len[i] - (offset - start[i])); @@ -215,6 +214,16 @@ public void evaluate(VectorizedRowBatch batch) { } } } + } else { + for (int i = 0; i != n; ++i) { + outV.isNull[i] = false; + int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx); + if (offset != -1) { + outV.setVal(i, vector[i], offset, len[i] - (offset - start[i])); + } else { + outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); + } + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java index 9d6eccf..c9338ff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.io.UnsupportedEncodingException; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -160,15 +161,17 @@ public void evaluate(VectorizedRowBatch batch) { int[] start = inV.start; outV.initBuffer(); + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.isRepeating) { - outV.isRepeating = true; + if (!inV.noNulls && inV.isNull[0]) { outV.isNull[0] = true; outV.noNulls = false; outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); - return; } else { - outV.noNulls = true; + outV.isNull[0] = false; populateSubstrOffsets(vector[0], start[0], len[0], startIdx, length, offsetArray); if (offsetArray[0] != -1) { outV.setVal(0, vector[0], offsetArray[0], offsetArray[1]); @@ -176,30 +179,19 @@ public void evaluate(VectorizedRowBatch batch) { outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); } } - } else { - outV.isRepeating = false; - if (batch.selectedInUse) { - if (!inV.noNulls) { - outV.noNulls = false; - for (int i = 0; i != n; ++i) { - int selected = sel[i]; - if (!inV.isNull[selected]) { - outV.isNull[selected] = false; - populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, - length, offsetArray); - if (offsetArray[0] != -1) { - outV.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]); - } else { - outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); - } - } else { - outV.isNull[selected] = true; - } - } - } else { - outV.noNulls = true; - for (int i = 0; i != n; ++i) { - int selected = sel[i]; + outV.isRepeating = true; + return; + } + + if (batch.selectedInUse) { + if (!inV.noNulls) /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outV.noNulls = false; + + for (int i = 0; i != n; ++i) { + int selected = sel[i]; + if (!inV.isNull[selected]) { outV.isNull[selected] = false; populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, length, offsetArray); @@ -208,26 +200,32 @@ public void evaluate(VectorizedRowBatch batch) { } else { outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); } + } else { + outV.isNull[selected] = true; } } } else { - if (!inV.noNulls) { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); - outV.noNulls = false; - for (int i = 0; i != n; ++i) { - if (!inV.isNull[i]) { - populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray); - if (offsetArray[0] != -1) { - outV.setVal(i, vector[i], offsetArray[0], offsetArray[1]); - } else { - outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); - } - } + for (int i = 0; i != n; ++i) { + int selected = sel[i]; + outV.isNull[selected] = false; + populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, + length, offsetArray); + if (offsetArray[0] != -1) { + outV.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]); + } else { + outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); } - } else { - outV.noNulls = true; - for (int i = 0; i != n; ++i) { - outV.isNull[i] = false; + } + } + } else { + if (!inV.noNulls) /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outV.noNulls = false; + + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for (int i = 0; i != n; ++i) { + if (!inV.isNull[i]) { populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray); if (offsetArray[0] != -1) { outV.setVal(i, vector[i], offsetArray[0], offsetArray[1]); @@ -236,6 +234,16 @@ public void evaluate(VectorizedRowBatch batch) { } } } + } else { + Arrays.fill(outV.isNull, 0, n, false); + for (int i = 0; i != n; ++i) { + populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray); + if (offsetArray[0] != -1) { + outV.setVal(i, vector[i], offsetArray[0], offsetArray[1]); + } else { + outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); + } + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java index 544b700..894bdae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java @@ -72,6 +72,8 @@ public void evaluate(VectorizedRowBatch batch) { int [] start = inputColVector.start; int [] length = inputColVector.length; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; outV.initBuffer(); Text t; @@ -86,26 +88,37 @@ public void evaluate(VectorizedRowBatch batch) { // It's implemented in the simplest way now, just calling the // existing built-in function. - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; s.set(vector[0], start[0], length[0]); t = func.evaluate(s); setString(outV, 0, t); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; /* Fill output isNull with false for selected elements since there is a chance we'll * convert to noNulls == false in setString(); */ - outV.isNull[i] = false; + outputIsNull[i] = false; s.set(vector[i], start[i], length[i]); t = func.evaluate(s); setString(outV, i, t); } - outV.isRepeating = false; } else { // Set all elements to not null. The setString call can override this. @@ -115,21 +128,13 @@ public void evaluate(VectorizedRowBatch batch) { t = func.evaluate(s); setString(outV, i, t); } - outV.isRepeating = false; } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; // setString can override this - if (!inputColVector.isNull[0]) { - s.set(vector[0], start[0], length[0]); - t = func.evaluate(s); - setString(outV, 0, t); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inputColVector.isNull[i]; // setString can override this @@ -139,7 +144,6 @@ public void evaluate(VectorizedRowBatch batch) { setString(outV, i, t); } } - outV.isRepeating = false; } else { // setString can override this null propagation @@ -151,7 +155,6 @@ public void evaluate(VectorizedRowBatch batch) { setString(outV, i, t); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java index 2f8b627..6cd003a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -60,6 +62,8 @@ public void evaluate(VectorizedRowBatch batch) { int start[] = inputColVector.start; int length[] = inputColVector.length; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; outV.initBuffer(); if (n == 0) { @@ -67,35 +71,43 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, vector, start, length, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, vector, start, length, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, vector, start, length, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - func(outV, vector, start, length, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inputColVector.isNull[i]; @@ -103,7 +115,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, vector, start, length, i); } } - outV.isRepeating = false; } else { System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -111,7 +122,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, vector, start, length, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java index 7fb95f5..bc50a7d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java @@ -73,8 +73,8 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -83,49 +83,52 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = inSet.contains(inputColVector.asScratchTimestamp(0)) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { - - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.contains(inputColVector.asScratchTimestamp(0)) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java index 5eb2090..f1be8c7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,6 +59,8 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; outV.initBuffer(); if (n == 0) { @@ -64,35 +68,43 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inputColVector, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inputColVector, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inputColVector, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - func(outV, inputColVector, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inputColVector.isNull[i]; @@ -108,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inputColVector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java index ea78a2e..1e36048 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java @@ -57,13 +57,13 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; - ColumnVector outputVector = batch.cols[outputColumnNum]; + ColumnVector outputColVector = batch.cols[outputColumnNum]; if (n <= 0) { // Nothing to do return; } - outputVector.init(); + outputColVector.init(); boolean noNulls = false; @@ -74,44 +74,52 @@ public void evaluate(VectorizedRowBatch batch) { noNulls = noNulls || cv.noNulls; } - outputVector.noNulls = noNulls; - outputVector.isRepeating = false; + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + + outputColVector.isRepeating = false; + + // Carefully handle NULLs... + // TEMPORARILY: Assume the worst... + outputColVector.noNulls = false; ColumnVector first = batch.cols[inputColumns[0]]; if (first.noNulls && first.isRepeating) { - outputVector.isRepeating = true; - outputVector.isNull[0] = false; - outputVector.setElement(0, 0, first); + outputColVector.isRepeating = true; + outputColVector.isNull[0] = false; + outputColVector.setElement(0, 0, first); } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector.isNull[i] = true; + outputColVector.isNull[i] = true; for (int k = 0; k < inputColumns.length; k++) { ColumnVector cv = batch.cols[inputColumns[k]]; if ( (cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) { - outputVector.isNull[i] = false; - outputVector.setElement(i, 0, cv); + outputColVector.isNull[i] = false; + outputColVector.setElement(i, 0, cv); break; } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) { - outputVector.isNull[i] = false; - outputVector.setElement(i, i, cv); + outputColVector.isNull[i] = false; + outputColVector.setElement(i, i, cv); break; } } } } else { for (int i = 0; i != n; i++) { - outputVector.isNull[i] = true; + outputColVector.isNull[i] = true; for (int k = 0; k < inputColumns.length; k++) { ColumnVector cv = batch.cols[inputColumns[k]]; if ((cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) { - outputVector.isNull[i] = false; - outputVector.setElement(i, 0, cv); + outputColVector.isNull[i] = false; + outputColVector.setElement(i, 0, cv); break; } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) { - outputVector.isNull[i] = false; - outputVector.setElement(i, i, cv); + outputColVector.isNull[i] = false; + outputColVector.setElement(i, i, cv); break; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java index b2891a8..f6e9c8b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java @@ -91,7 +91,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - // Handle null + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse); switch (primitiveCategory) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java index e232555..24304e3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java @@ -32,6 +32,7 @@ import org.apache.hive.common.util.DateParser; import java.sql.Date; +import java.util.Arrays; public class VectorUDFDateAddColScalar extends VectorExpression { private static final long serialVersionUID = 1L; @@ -89,27 +90,41 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; switch (primitiveCategory) { case DATE: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = evaluateDate(inputCol, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = evaluateDate(inputCol, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = evaluateDate(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; @@ -130,22 +145,36 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = evaluateTimestamp(inputCol, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (batch.selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = evaluateTimestamp(inputCol, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = evaluateTimestamp(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; @@ -168,22 +197,36 @@ public void evaluate(VectorizedRowBatch batch) { case STRING: case CHAR: case VARCHAR: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + evaluateString(inputCol, outV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (batch.selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; evaluateString(inputCol, outV, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { evaluateString(inputCol, outV, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 0aaba26..c0fa1bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -30,6 +30,7 @@ import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; +import java.util.Arrays; public class VectorUDFDateAddScalarCol extends VectorExpression { @@ -130,26 +131,43 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; long baseDateDays = DateWritable.millisToDays(baseDate.getTime()); + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + evaluate(baseDateDays, inputCol.vector[0], outV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; evaluate(baseDateDays, inputCol.vector[i], outV, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { evaluate(baseDateDays, inputCol.vector[i], outV, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java index 982467e..0d794fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java @@ -89,6 +89,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse); LongColumnVector convertedVector1 = toDateArray(batch, inputTypeInfos[0], inputColVector1, dateVector1); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java index 97e3669..2a0fd28 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java @@ -35,6 +35,7 @@ import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.Arrays; public class VectorUDFDateDiffColScalar extends VectorExpression { private static final long serialVersionUID = 1L; @@ -92,8 +93,8 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; PrimitiveCategory primitiveCategory1 = ((PrimitiveTypeInfo) inputTypeInfos[1]).getPrimitiveCategory(); switch (primitiveCategory1) { @@ -134,22 +135,36 @@ public void evaluate(VectorizedRowBatch batch) { PrimitiveCategory primitiveCategory0 = ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory(); switch (primitiveCategory0) { case DATE: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = evaluateDate(inputCol, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = evaluateDate(inputCol, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = evaluateDate(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; @@ -170,22 +185,36 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = evaluateTimestamp(inputCol, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = evaluateTimestamp(inputCol, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = evaluateTimestamp(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; @@ -208,22 +237,36 @@ public void evaluate(VectorizedRowBatch batch) { case STRING: case CHAR: case VARCHAR: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + evaluateString(inputCol, outV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; evaluateString(inputCol, outV, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { evaluateString(inputCol, outV, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java index c575c05..85cf72d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java @@ -33,6 +33,7 @@ import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.Arrays; public class VectorUDFDateDiffScalarCol extends VectorExpression { private static final long serialVersionUID = 1L; @@ -90,8 +91,8 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; PrimitiveCategory primitiveCategory0 = ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory(); @@ -134,22 +135,36 @@ public void evaluate(VectorizedRowBatch batch) { ((PrimitiveTypeInfo) inputTypeInfos[1]).getPrimitiveCategory(); switch (primitiveCategory1) { case DATE: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = evaluateDate(inputCol, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = evaluateDate(inputCol, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = evaluateDate(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; @@ -170,22 +185,36 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = evaluateTimestamp(inputCol, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = evaluateTimestamp(inputCol, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = evaluateTimestamp(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; @@ -208,22 +237,36 @@ public void evaluate(VectorizedRowBatch batch) { case STRING: case CHAR: case VARCHAR: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + evaluateString(inputCol, outV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; evaluateString(inputCol, outV, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { evaluateString(inputCol, outV, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java index 9d72bdf..1f2d5cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java @@ -55,7 +55,10 @@ public void evaluate(VectorizedRowBatch batch) { // indexColumnVector includes the keys of Map indexColumnVector = batch.cols[indexColumnNum]; - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + int[] mapValueIndex; if (mapV.isRepeating) { if (mapV.isNull[0]) { @@ -71,9 +74,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.noNulls = false; } else { // the key is found in MapColumnVector, set the value - outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); outV.isNull[0] = false; - outV.noNulls = true; + outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); } outV.isRepeating = true; } else { @@ -97,8 +99,8 @@ private void setUnRepeatingOutVector(VectorizedRowBatch batch, MapColumnVector m outV.isNull[j] = true; outV.noNulls = false; } else { - outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values); outV.isNull[j] = false; + outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values); } } outV.isRepeating = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java index e6a86ae..a7d730b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java @@ -50,7 +50,10 @@ public void evaluate(VectorizedRowBatch batch) { ColumnVector outV = batch.cols[outputColumnNum]; MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum]; - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + int[] mapValueIndex; if (mapV.isRepeating) { if (mapV.isNull[0]) { @@ -65,7 +68,6 @@ public void evaluate(VectorizedRowBatch batch) { } else { // the key is found in MapColumnVector, set the value outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); - outV.noNulls = true; } } outV.isRepeating = true; @@ -77,8 +79,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[j] = true; outV.noNulls = false; } else { - outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values); outV.isNull[j] = false; + outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values); } } outV.isRepeating = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java index 0507fa5..ca3845d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java @@ -52,14 +52,16 @@ public void evaluate(VectorizedRowBatch batch) { StructColumnVector structColumnVector = (StructColumnVector) batch.cols[structColumnNum]; ColumnVector fieldColumnVector = structColumnVector.fields[fieldIndex]; - outV.noNulls = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (structColumnVector.isRepeating) { if (structColumnVector.isNull[0]) { outV.isNull[0] = true; outV.noNulls = false; } else { - outV.setElement(0, 0, fieldColumnVector); outV.isNull[0] = false; + outV.setElement(0, 0, fieldColumnVector); } outV.isRepeating = true; } else { @@ -69,11 +71,10 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[j] = true; outV.noNulls = false; } else { - outV.setElement(j, j, fieldColumnVector); outV.isNull[j] = false; + outV.setElement(j, (fieldColumnVector.isRepeating ? 0 : j), fieldColumnVector); } } - outV.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java index 519a4e4..411a33f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; import java.util.Calendar; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -95,27 +96,41 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputColVec.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; LongColumnVector longColVector = (LongColumnVector) inputColVec; + if (inputColVec.isRepeating) { + if (inputColVec.noNulls || !inputColVec.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = getDateField(longColVector.vector[0]); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + if (inputColVec.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = getDateField(longColVector.vector[i]); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = getDateField(longColVector.vector[i]); } } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... outV.noNulls = false; + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java index c5762d1..2918546 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java @@ -106,11 +106,27 @@ public void evaluate(VectorizedRowBatch batch) { return; } - // true for all algebraic UDFs with no state - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + try { + outV.isNull[0] = false; + outV.vector[0] = getField(inputCol.vector[0], inputCol.start[0], inputCol.length[0]); + } catch (ParseException e) { + outV.noNulls = false; + outV.isNull[0] = true; + } + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for (int j = 0; j < n; j++) { int i = sel[j]; @@ -133,11 +149,11 @@ public void evaluate(VectorizedRowBatch batch) { } } } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + // Carefully handle NULLs... outV.noNulls = false; + if (selectedInUse) { for (int j = 0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java index 54cb5d8..740a00c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; import java.util.Calendar; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -95,27 +96,41 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputColVec.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVec; + if (inputColVec.isRepeating) { + if (inputColVec.noNulls || !inputColVec.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = getTimestampField(timestampColVector, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + if (inputColVec.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = getTimestampField(timestampColVector, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = getTimestampField(timestampColVector, i); } } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... outV.noNulls = false; + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java index 6ebd7d3..18bacc5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java @@ -151,7 +151,7 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) Aggregation myagg = (Aggregation) agg; if (inputColumn.isRepeating) { - if (inputColumn.noNulls) { + if (inputColumn.noNulls || !inputColumn.isNull[0]) { valueProcessor.processValue(myagg, inputColumn, 0); } return; @@ -251,7 +251,11 @@ public void aggregateInputSelection( } } else { if (inputColumn.isRepeating) { - // All nulls, no-op for min/max + if (!inputColumn.isNull[0]) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + inputColumn, batchSize); + } } else { if (batch.selectedInUse) { iterateHasNullsSelectionWithAggregationSelection( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java index 8f1375e..b08bc32 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java @@ -123,7 +123,7 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) Aggregation myagg = (Aggregation) agg; if (inputColumn.isRepeating) { - if (inputColumn.noNulls) { + if (inputColumn.noNulls || !inputColumn.isNull[0]) { processValue(myagg, inputColumn, 0); } return; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java index 888f5f0..77e751d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java @@ -284,9 +284,9 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) Aggregation myagg = (Aggregation)agg; long[] vector = inputVector.vector; - + if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { myagg.value += vector[0]*batchSize; } return; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64.java index 251de3a..a503445 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64.java @@ -333,7 +333,7 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) long[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64ToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64ToDecimal.java index 06e319b..ddc190c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64ToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64ToDecimal.java @@ -358,7 +358,7 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) long[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumTimestamp.java index 3ca5ee0..e542033 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumTimestamp.java @@ -297,13 +297,13 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) Aggregation myagg = (Aggregation)agg; if (inputVector.isRepeating) { - if (inputVector.noNulls) { - if (myagg.isNull) { - myagg.isNull = false; - myagg.sum = 0; + if (inputVector.noNulls || !inputVector.isNull[0]) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + } + myagg.sum += inputVector.getDouble(0) * batchSize; } - myagg.sum += inputVector.getDouble(0) * batchSize; - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java index 7166c64..9515832 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java @@ -62,7 +62,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } ColumnVector colVector = batch.cols[inputColumnNum]; if (colVector.isRepeating) { - if (colVector.noNulls) { + if (colVector.noNulls || !colVector.isNull[0]) { count += size; } } else if (colVector.noNulls) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java index 85e5ebe..bd4896a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java @@ -72,7 +72,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); if (decimalColVector.isRepeating) { - if (decimalColVector.noNulls) { + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { // We have a repeated value. The sum increases by value * batch.size. temp.setFromLong(batch.size); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java index ed11a09..daba90c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java @@ -70,7 +70,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); if (decimalColVector.isRepeating) { - if (decimalColVector.noNulls) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { firstValue.set(decimalColVector.vector[0]); isGroupResultNull = false; } @@ -86,6 +87,10 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc haveFirstValue = true; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // First value is repeated for all batches. DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; outputColVector.isRepeating = true; @@ -93,7 +98,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc outputColVector.noNulls = false; outputColVector.isNull[0] = true; } else { - outputColVector.noNulls = true; outputColVector.isNull[0] = false; outputColVector.vector[0].set(firstValue); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java index eb55792..b2cbdf6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java @@ -70,7 +70,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); if (decimalColVector.isRepeating) { - if (decimalColVector.noNulls) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { lastValue.set(decimalColVector.vector[0]); isGroupResultNull = false; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java index 231cf9b..4d8c7fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java @@ -64,7 +64,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); if (decimalColVector.isRepeating) { - if (decimalColVector.noNulls) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { if (isGroupResultNull) { max.set(decimalColVector.vector[0]); isGroupResultNull = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java index 6373c09..312c43c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java @@ -64,7 +64,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); if (decimalColVector.isRepeating) { - if (decimalColVector.noNulls) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { if (isGroupResultNull) { min.set(decimalColVector.vector[0]); isGroupResultNull = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java index 36dd119..e899c36 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java @@ -66,7 +66,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); if (decimalColVector.isRepeating) { - if (decimalColVector.noNulls) { + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { temp.setFromLong(batch.size); if (isGroupResultNull) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java index 9ceeb13..e457e32 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java @@ -51,7 +51,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum]; longColVector.isRepeating = true; - longColVector.noNulls = true; longColVector.isNull[0] = false; longColVector.vector[0] = denseRank; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java index 271a936..298de2d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java @@ -66,7 +66,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); if (doubleColVector.isRepeating) { - if (doubleColVector.noNulls) { + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { // We have a repeated value. The sum increases by value * batch.size. if (isGroupResultNull) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java index 9f65de4..460fbe5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java @@ -66,7 +66,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); if (doubleColVector.isRepeating) { - if (doubleColVector.noNulls) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { firstValue = doubleColVector.vector[0]; isGroupResultNull = false; } @@ -82,6 +83,10 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc haveFirstValue = true; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // First value is repeated for all batches. DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; outputColVector.isRepeating = true; @@ -89,7 +94,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc outputColVector.noNulls = false; outputColVector.isNull[0] = true; } else { - outputColVector.noNulls = true; outputColVector.isNull[0] = false; outputColVector.vector[0] = firstValue; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java index 8d28994..51d06a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java @@ -66,7 +66,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); if (doubleColVector.isRepeating) { - if (doubleColVector.noNulls) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { lastValue = doubleColVector.vector[0]; isGroupResultNull = false; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java index 732369a..2bc4c6b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java @@ -60,7 +60,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); if (doubleColVector.isRepeating) { - if (doubleColVector.noNulls) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { if (isGroupResultNull) { max = doubleColVector.vector[0]; isGroupResultNull = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java index 91c538d..6a422b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java @@ -60,7 +60,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); if (doubleColVector.isRepeating) { - if (doubleColVector.noNulls) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { if (isGroupResultNull) { min = doubleColVector.vector[0]; isGroupResultNull = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java index 0c534d8..2ecc4ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java @@ -61,7 +61,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); if (doubleColVector.isRepeating) { - if (doubleColVector.noNulls) { + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { if (isGroupResultNull) { // First aggregation calculation for group. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java index 66e8f98..f48df25 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java @@ -66,7 +66,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); if (longColVector.isRepeating) { - if (longColVector.noNulls) { + if (longColVector.noNulls || !longColVector.isNull[0]) { // We have a repeated value. The sum increases by value * batch.size. if (isGroupResultNull) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java index 5151ecb..3deadb1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java @@ -66,7 +66,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); if (longColVector.isRepeating) { - if (longColVector.noNulls) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { firstValue = longColVector.vector[0]; isGroupResultNull = false; } @@ -82,6 +83,10 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc haveFirstValue = true; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // First value is repeated for all batches. LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; outputColVector.isRepeating = true; @@ -89,7 +94,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc outputColVector.noNulls = false; outputColVector.isNull[0] = true; } else { - outputColVector.noNulls = true; outputColVector.isNull[0] = false; outputColVector.vector[0] = firstValue; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java index fa8e880..0a5df51 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java @@ -66,7 +66,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); if (longColVector.isRepeating) { - if (longColVector.noNulls) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { lastValue = longColVector.vector[0]; isGroupResultNull = false; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java index b60b03b..0e7eb07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java @@ -60,7 +60,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); if (longColVector.isRepeating) { - if (longColVector.noNulls) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { if (isGroupResultNull) { max = longColVector.vector[0]; isGroupResultNull = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java index 26ea0df..df31c51 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java @@ -60,7 +60,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); if (longColVector.isRepeating) { - if (longColVector.noNulls) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { if (isGroupResultNull) { min = longColVector.vector[0]; isGroupResultNull = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java index ce0acb5..9402218 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java @@ -61,7 +61,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); if (longColVector.isRepeating) { - if (longColVector.noNulls) { + if (longColVector.noNulls || !longColVector.isNull[0]) { if (isGroupResultNull) { // First aggregation calculation for group. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java index f7080e5..34add61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java @@ -50,9 +50,12 @@ public VectorPTFEvaluatorRank(WindowFrameDef windowFrameDef, VectorExpression in public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { evaluateInputExpr(batch); + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum]; longColVector.isRepeating = true; - longColVector.noNulls = true; longColVector.isNull[0] = false; longColVector.vector[0] = rank; groupCount += batch.size; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java index f23a8b3..607d34a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java @@ -178,6 +178,11 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } private void fillGroupResults(VectorizedRowBatch batch) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + for (VectorPTFEvaluatorBase evaluator : evaluators) { final int outputColumnNum = evaluator.getOutputColumnNum(); if (evaluator.streamsResult()) { @@ -190,7 +195,6 @@ private void fillGroupResults(VectorizedRowBatch batch) { if (isGroupResultNull) { outputColVector.noNulls = false; } else { - outputColVector.noNulls = true; switch (evaluator.getResultColumnVectorType()) { case LONG: ((LongColumnVector) outputColVector).vector[0] = evaluator.getLongGroupResult(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java index c9717ba..39fab2c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java @@ -434,6 +434,7 @@ private boolean isPartitionChanged(VectorizedRowBatch batch) { return true; } if (isNull) { + // NULL does equal NULL here. continue; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index a1a1282..82b7a15 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -140,7 +140,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - batch.cols[outputColumnNum].noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ /* If all input columns are repeating, just evaluate function * for row 0 in the batch and set output repeating. diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 190771e..81b8826 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1701,7 +1701,7 @@ private boolean validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInf private boolean validateAndVectorizeMapOperators(MapWork mapWork, TableScanOperator tableScanOperator, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { - LOG.info("Validating and vectorizing MapWork..."); + LOG.info("Validating and vectorizing MapWork... (vectorizedVertexNum " + vectorizedVertexNum + ")"); // Set "global" member indicating where to store "not vectorized" information if necessary. currentBaseWork = mapWork; @@ -1905,7 +1905,7 @@ private boolean validateAndVectorizeReduceOperators(ReduceWork reduceWork, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { - LOG.info("Validating and vectorizing ReduceWork..."); + LOG.info("Validating and vectorizing ReduceWork... (vectorizedVertexNum " + vectorizedVertexNum + ")"); Operator newVectorReducer; try { @@ -4101,9 +4101,6 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { for (int i = 0; i < size; i++) { ExprNodeDesc expr = colList.get(i); VectorExpression ve = vContext.getVectorExpression(expr); - if (ve.getOutputColumnNum() == -1) { - fake++; - } projectedOutputColumns[i] = ve.getOutputColumnNum(); if (ve instanceof IdentityExpression) { // Suppress useless evaluation. diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java index c646bf1..ea19e93 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java @@ -188,7 +188,6 @@ public void testLongColumnColumnIfExpr() { assertEquals(2, r.vector[1]); assertEquals(-3, r.vector[2]); assertEquals(-4, r.vector[3]); - assertEquals(true, r.noNulls); assertEquals(false, r.isRepeating); // verify when first argument (boolean flags) is repeating @@ -230,7 +229,6 @@ public void testLongColumnColumnIfExpr() { assertEquals(2, r.vector[1]); assertEquals(3, r.vector[2]); assertEquals(-4, r.vector[3]); - assertEquals(true, r.noNulls); assertEquals(false, r.isRepeating); // test when second argument has nulls @@ -308,7 +306,6 @@ public void testDoubleColumnColumnIfExpr() { assertEquals(true, 2d == r.vector[1]); assertEquals(true, -3d == r.vector[2]); assertEquals(true, -4d == r.vector[3]); - assertEquals(true, r.noNulls); assertEquals(false, r.isRepeating); } @@ -480,7 +477,6 @@ public void testIfExprStringColumnStringScalar() { assertTrue(getString(r, 1).equals("scalar")); assertTrue(getString(r, 2).equals("arg2_2")); assertTrue(getString(r, 3).equals("arg2_3")); - assertTrue(r.noNulls); // test for null input strings batch = getBatch1Long3BytesVectors(); @@ -504,7 +500,6 @@ public void testIfExprStringScalarStringColumn() { assertTrue(getString(r, 1).equals("arg3_1")); assertTrue(getString(r, 2).equals("scalar")); assertTrue(getString(r, 3).equals("scalar")); - assertTrue(r.noNulls); // test for null input strings batch = getBatch1Long3BytesVectors(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java index bd5a6b7..a60b9e4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java @@ -67,7 +67,6 @@ public void testLongColOrLongCol() { expr.evaluate(batch); // spot check - Assert.assertTrue(outCol.noNulls); Assert.assertEquals(0, outCol.vector[0]); Assert.assertEquals(1, outCol.vector[1]); Assert.assertEquals(1, outCol.vector[2]); @@ -125,7 +124,6 @@ public void testLongColAndLongCol() { expr.evaluate(batch); // spot check - Assert.assertTrue(outCol.noNulls); Assert.assertEquals(0, outCol.vector[0]); Assert.assertEquals(0, outCol.vector[1]); Assert.assertEquals(0, outCol.vector[2]); @@ -207,7 +205,6 @@ public void testBooleanNot() { batch.cols[0].noNulls = true; expr.evaluate(batch); Assert.assertFalse(outCol.isRepeating); - Assert.assertTrue(outCol.noNulls); Assert.assertEquals(1, outCol.vector[0]); Assert.assertEquals(0, outCol.vector[2]); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java index ca3c259..202f18c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java @@ -3757,7 +3757,6 @@ public void testStringColCompareStringColProjection() { expr.evaluate(batch); Assert.assertEquals(4, batch.size); outVector = ((LongColumnVector) batch.cols[3]).vector; - Assert.assertFalse(batch.cols[3].noNulls); Assert.assertFalse(batch.cols[3].isNull[0]); Assert.assertEquals(1, outVector[0]); Assert.assertFalse(batch.cols[3].isNull[1]); @@ -3821,7 +3820,6 @@ public void testStringColCompareStringColProjection() { expr.evaluate(batch); outVector = ((LongColumnVector) batch.cols[3]).vector; Assert.assertEquals(4, batch.size); - Assert.assertFalse(batch.cols[3].noNulls); Assert.assertFalse(batch.cols[3].isNull[0]); Assert.assertEquals(1, outVector[0]); Assert.assertFalse(batch.cols[3].isNull[1]); @@ -4064,7 +4062,6 @@ public void testColLower() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatchMixedCase(); @@ -4124,7 +4121,6 @@ public void testStringLength() { expr.evaluate(batch); outCol = (LongColumnVector) batch.cols[1]; Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); Assert.assertEquals(7, outCol.vector[0]); // length of "mixedUp" // no nulls, is repeating @@ -4486,7 +4482,6 @@ public void testColConcatStringScalar() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4549,7 +4544,6 @@ public void testColConcatCharScalar() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4612,7 +4606,6 @@ public void testColConcatVarCharScalar() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4675,7 +4668,6 @@ public void testStringScalarConcatCol() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4738,7 +4730,6 @@ public void testCharScalarConcatCol() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4801,7 +4792,6 @@ public void testVarCharScalarConcatCol() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4923,7 +4913,6 @@ public void testColConcatCol() { batch.cols[0].noNulls = true; expr.evaluate(batch); Assert.assertEquals(false, outCol.isRepeating); - Assert.assertEquals(true, outCol.noNulls); cmp = StringExpr.compare(red, 0, red.length, outCol.vector[2], outCol.start[2], outCol.length[2]); Assert.assertEquals(0, cmp); @@ -5015,7 +5004,6 @@ public void testSubstrStart() throws UnsupportedEncodingException { expr.evaluate(batch); outCol = (BytesColumnVector) batch.cols[1]; Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, StringExpr.compare( @@ -5043,7 +5031,6 @@ public void testSubstrStart() throws UnsupportedEncodingException { expr = new StringSubstrColStart(0, 1, 1); expr.evaluate(batch); Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, @@ -5128,7 +5115,6 @@ public void testSubstrStart() throws UnsupportedEncodingException { expr.evaluate(batch); outCol = (BytesColumnVector) batch.cols[1]; Assert.assertFalse(outV.isRepeating); - Assert.assertTrue(outV.noNulls); Assert.assertEquals(0, StringExpr.compare( // 3nd char starts from index 3 and total length should be 7 bytes as max is 10 @@ -5153,7 +5139,6 @@ public void testSubstrStart() throws UnsupportedEncodingException { expr = new StringSubstrColStart(0, 2, 1); expr.evaluate(batch); Assert.assertFalse(outV.isRepeating); - Assert.assertTrue(outV.noNulls); Assert.assertEquals(0, StringExpr.compare( // the result is the last 1 character, which occupies 4 bytes @@ -5190,7 +5175,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { expr.evaluate(batch); BytesColumnVector outCol = (BytesColumnVector) batch.cols[1]; Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); byte[] expected = "string".getBytes("UTF-8"); Assert.assertEquals(0, @@ -5218,7 +5202,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { expr = new StringSubstrColStartLen(0, -6, 6, 1); expr.evaluate(batch); outCol = (BytesColumnVector) batch.cols[1]; - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(3, batch.size); @@ -5250,7 +5233,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { outCol = (BytesColumnVector) batch.cols[1]; expr.evaluate(batch); Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, StringExpr.compare( @@ -5280,7 +5262,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { outCol = (BytesColumnVector) batch.cols[1]; expr.evaluate(batch); Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, StringExpr.compare( @@ -5310,7 +5291,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { expr.evaluate(batch); outCol = (BytesColumnVector) batch.cols[1]; Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, StringExpr.compare( @@ -5391,7 +5371,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { expr.evaluate(batch); Assert.assertEquals(1, batch.size); Assert.assertFalse(outV.isRepeating); - Assert.assertTrue(outV.noNulls); Assert.assertEquals(0, StringExpr.compare( // 3rd char starts at index 3, and with length 2 it is covering the rest of the array. @@ -5415,7 +5394,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { outCol = (BytesColumnVector) batch.cols[1]; Assert.assertEquals(1, batch.size); Assert.assertFalse(outV.isRepeating); - Assert.assertTrue(outV.noNulls); Assert.assertEquals(0, StringExpr.compare( // 2nd substring index refers to the 6th index (last char in the array) diff --git ql/src/test/queries/clientpositive/vector_date_1.q ql/src/test/queries/clientpositive/vector_date_1.q index 0055973..bb515b1 100644 --- ql/src/test/queries/clientpositive/vector_date_1.q +++ ql/src/test/queries/clientpositive/vector_date_1.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; drop table if exists vector_date_1; create table vector_date_1 (dt1 date, dt2 date) stored as orc; @@ -13,8 +14,10 @@ insert into table vector_date_1 insert into table vector_date_1 select date '2001-01-01', date '2001-06-01' from src limit 1; +select * from vector_date_1 order by dt1, dt2; + -- column-to-column comparison in select clause -explain +explain vectorization detail select dt1, dt2, -- should be all true @@ -41,7 +44,7 @@ select dt2 > dt1 from vector_date_1 order by dt1; -explain +explain vectorization detail select dt1, dt2, -- should be all false @@ -69,7 +72,7 @@ select from vector_date_1 order by dt1; -- column-to-literal/literal-to-column comparison in select clause -explain +explain vectorization detail select dt1, -- should be all true @@ -96,7 +99,7 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1; -explain +explain vectorization detail select dt1, -- should all be false @@ -126,7 +129,7 @@ from vector_date_1 order by dt1; -- column-to-column comparisons in predicate -- all rows with non-null dt1 should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -153,7 +156,7 @@ order by dt1; -- column-to-literal/literal-to-column comparison in predicate -- only a single row should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -182,7 +185,7 @@ where and date '1970-01-01' <= dt1 order by dt1; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01'); SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01'); diff --git ql/src/test/queries/clientpositive/vector_interval_1.q ql/src/test/queries/clientpositive/vector_interval_1.q index f4f0024..3702734 100644 --- ql/src/test/queries/clientpositive/vector_interval_1.q +++ ql/src/test/queries/clientpositive/vector_interval_1.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; drop table if exists vector_interval_1; create table vector_interval_1 (ts timestamp, dt date, str1 string, str2 string) stored as orc; @@ -11,6 +12,8 @@ insert into vector_interval_1 insert into vector_interval_1 select null, null, null, null from src limit 1; +select * from vector_interval_1; + -- constants/cast from string explain vectorization expression select diff --git ql/src/test/queries/clientpositive/vector_ptf_part_simple.q ql/src/test/queries/clientpositive/vector_ptf_part_simple.q index fc9f9eb..5615bca 100644 --- ql/src/test/queries/clientpositive/vector_ptf_part_simple.q +++ ql/src/test/queries/clientpositive/vector_ptf_part_simple.q @@ -531,19 +531,37 @@ count(*) over(partition by p_mfgr order by p_name) as cs from vector_ptf_part_simple_orc; +explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc; + select p_mfgr, p_retailprice, rank() over(partition by p_mfgr) as r from vector_ptf_part_simple_orc; +explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc; + select p_mfgr, p_retailprice, rank() over(partition by p_mfgr order by p_name) as r from vector_ptf_part_simple_orc; +explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc; select p_mfgr, p_name, p_retailprice, rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r from vector_ptf_part_simple_orc; +explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc; select p_mfgr, p_name, p_retailprice, rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r diff --git ql/src/test/queries/clientpositive/vector_udf2.q ql/src/test/queries/clientpositive/vector_udf2.q index e62af6a..bd8e875 100644 --- ql/src/test/queries/clientpositive/vector_udf2.q +++ ql/src/test/queries/clientpositive/vector_udf2.q @@ -33,10 +33,16 @@ create temporary table HIVE_14349 (a string) stored as orc; insert into HIVE_14349 values('XYZa'), ('badXYZa'); +explain vectorization expression +select * from HIVE_14349 where a LIKE 'XYZ%a%'; + select * from HIVE_14349 where a LIKE 'XYZ%a%'; insert into HIVE_14349 values ('XYZab'), ('XYZabBAD'), ('badXYZab'), ('badXYZabc'); +explain vectorization expression +select * from HIVE_14349 where a LIKE 'XYZ%a_'; + select * from HIVE_14349 where a LIKE 'XYZ%a_'; drop table HIVE_14349; diff --git ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q index eeb5ab8..5c052a1 100644 --- ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q +++ ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q @@ -1,5 +1,5 @@ set hive.mapred.mode=nonstrict; -SET hive.vectorized.execution.enabled = true; +SET hive.vectorized.execution.enabled = false; SET hive.int.timestamp.conversion.in.seconds=false; set hive.fetch.task.conversion=none; diff --git ql/src/test/queries/clientpositive/vectorization_nested_udf.q ql/src/test/queries/clientpositive/vectorization_nested_udf.q index da8f99c..25a25df 100644 --- ql/src/test/queries/clientpositive/vectorization_nested_udf.q +++ ql/src/test/queries/clientpositive/vectorization_nested_udf.q @@ -1,5 +1,8 @@ +set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; +EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc; SELECT SUM(abs(ctinyint)) from alltypesorc; diff --git ql/src/test/queries/clientpositive/vectorized_case.q ql/src/test/queries/clientpositive/vectorized_case.q index 99d7cfc..8aad2b5 100644 --- ql/src/test/queries/clientpositive/vectorized_case.q +++ ql/src/test/queries/clientpositive/vectorized_case.q @@ -1,8 +1,8 @@ set hive.explain.user=false; set hive.fetch.task.conversion=none; -set hive.vectorized.execution.enabled = true -; -explain vectorization expression +set hive.vectorized.execution.enabled = true; + +explain vectorization detail select csmallint, case @@ -37,7 +37,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 ; -explain vectorization expression +explain vectorization detail select csmallint, case @@ -55,7 +55,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 ; -explain vectorization expression +explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd @@ -64,7 +64,7 @@ select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc; -explain vectorization expression +explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd @@ -79,19 +79,19 @@ CREATE TABLE test_1 (member DECIMAL , attr DECIMAL) STORED AS ORC; INSERT INTO test_1 VALUES (3.0,1.0),(2.0,2.0),(1.0,3.0); --for length=3 -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1; SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1; --for length=2 and the expr2 is null -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1; SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1; --for length=2 and the expr3 is null -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1; SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1; @@ -102,19 +102,105 @@ CREATE TABLE test_2 (member BIGINT, attr BIGINT) STORED AS ORC; INSERT INTO test_2 VALUES (3,1),(2,2),(1,3); --for length=3 -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2; SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2; ---for length=2 and the expression2 is null -EXPLAIN VECTORIZATION EXPRESSION +--for length=2 and the detail2 is null +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2; SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2; ---for length=2 and the expression3 is null -EXPLAIN VECTORIZATION EXPRESSION +--for length=2 and the detail3 is null +EXPLAIN VECTORIZATION DETAIL +SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; + SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; -SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; \ No newline at end of file + +select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a; + + +select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a; + + diff --git ql/src/test/results/clientpositive/llap/vector_const.q.out ql/src/test/results/clientpositive/llap/vector_const.q.out new file mode 100644 index 0000000..964ddcc --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_const.q.out @@ -0,0 +1,66 @@ +PREHOOK: query: CREATE TEMPORARY TABLE varchar_const_1 (c1 int) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_const_1 +POSTHOOK: query: CREATE TEMPORARY TABLE varchar_const_1 (c1 int) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_const_1 +PREHOOK: query: INSERT INTO varchar_const_1 values(42) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@varchar_const_1 +POSTHOOK: query: INSERT INTO varchar_const_1 values(42) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@varchar_const_1 +POSTHOOK: Lineage: varchar_const_1.c1 SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: varchar_const_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'FF' (type: varchar(4)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_const_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_const_1 +#### A masked pattern was here #### +FF diff --git ql/src/test/results/clientpositive/llap/vector_date_1.q.out ql/src/test/results/clientpositive/llap/vector_date_1.q.out index 1e3d2b3..bacd667 100644 --- ql/src/test/results/clientpositive/llap/vector_date_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_date_1.q.out @@ -22,6 +22,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 EXPRESSION [] POSTHOOK: Lineage: vector_date_1.dt2 EXPRESSION [] +_col0 _col1 PREHOOK: query: insert into table vector_date_1 select date '1999-12-31', date '2000-01-01' from src limit 1 PREHOOK: type: QUERY @@ -34,6 +35,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] +_c0 _c1 PREHOOK: query: insert into table vector_date_1 select date '2001-01-01', date '2001-06-01' from src limit 1 PREHOOK: type: QUERY @@ -46,7 +48,20 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] -PREHOOK: query: explain +_c0 _c1 +PREHOOK: query: select * from vector_date_1 order by dt1, dt2 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from vector_date_1 order by dt1, dt2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +vector_date_1.dt1 vector_date_1.dt2 +NULL NULL +1999-12-31 2000-01-01 +2001-01-01 2001-06-01 +PREHOOK: query: explain vectorization detail select dt1, dt2, -- should be all true @@ -60,7 +75,7 @@ select dt2 > dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2, -- should be all true @@ -74,6 +89,11 @@ select dt2 > dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -91,26 +111,75 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), dt2 (type: date), (dt1 = dt1) (type: boolean), (dt1 <> dt2) (type: boolean), (dt1 <= dt1) (type: boolean), (dt1 <= dt2) (type: boolean), (dt1 < dt2) (type: boolean), (dt2 >= dt2) (type: boolean), (dt2 >= dt1) (type: boolean), (dt2 > dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: LongColEqualLongColumn(col 0:date, col 0:date) -> 3:boolean, LongColNotEqualLongColumn(col 0:date, col 1:date) -> 4:boolean, LongColLessEqualLongColumn(col 0:date, col 0:date) -> 5:boolean, LongColLessEqualLongColumn(col 0:date, col 1:date) -> 6:boolean, LongColLessLongColumn(col 0:date, col 1:date) -> 7:boolean, LongColGreaterEqualLongColumn(col 1:date, col 1:date) -> 8:boolean, LongColGreaterEqualLongColumn(col 1:date, col 0:date) -> 9:boolean, LongColGreaterLongColumn(col 1:date, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 10 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:date, VALUE._col1:boolean, VALUE._col2:boolean, VALUE._col3:boolean, VALUE._col4:boolean, VALUE._col5:boolean, VALUE._col6:boolean, VALUE._col7:boolean, VALUE._col8:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -153,10 +222,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 2000-01-01 true true true true true true true true 2001-01-01 2001-06-01 true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2, -- should be all false @@ -170,7 +240,7 @@ select dt2 < dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2, -- should be all false @@ -184,6 +254,11 @@ select dt2 < dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -201,26 +276,75 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), dt2 (type: date), (dt1 <> dt1) (type: boolean), (dt1 = dt2) (type: boolean), (dt1 < dt1) (type: boolean), (dt1 >= dt2) (type: boolean), (dt1 > dt2) (type: boolean), (dt2 > dt2) (type: boolean), (dt2 <= dt1) (type: boolean), (dt2 < dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: LongColNotEqualLongColumn(col 0:date, col 0:date) -> 3:boolean, LongColEqualLongColumn(col 0:date, col 1:date) -> 4:boolean, LongColLessLongColumn(col 0:date, col 0:date) -> 5:boolean, LongColGreaterEqualLongColumn(col 0:date, col 1:date) -> 6:boolean, LongColGreaterLongColumn(col 0:date, col 1:date) -> 7:boolean, LongColGreaterLongColumn(col 1:date, col 1:date) -> 8:boolean, LongColLessEqualLongColumn(col 1:date, col 0:date) -> 9:boolean, LongColLessLongColumn(col 1:date, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 10 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:date, VALUE._col1:boolean, VALUE._col2:boolean, VALUE._col3:boolean, VALUE._col4:boolean, VALUE._col5:boolean, VALUE._col6:boolean, VALUE._col7:boolean, VALUE._col8:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -263,10 +387,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 2000-01-01 false false false false false false false false 2001-01-01 2001-06-01 false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, -- should be all true @@ -280,7 +405,7 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, -- should be all true @@ -294,6 +419,11 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -311,26 +441,75 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), (dt1 <> 1970-01-01) (type: boolean), (dt1 >= 1970-01-01) (type: boolean), (dt1 > 1970-01-01) (type: boolean), (dt1 <= 2100-01-01) (type: boolean), (dt1 < 2100-01-01) (type: boolean), (1970-01-01 <> dt1) (type: boolean), (1970-01-01 <= dt1) (type: boolean), (1970-01-01 < dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: DateColNotEqualDateScalar(col 0:date, date 1970-01-01) -> 3:boolean, DateColGreaterEqualDateScalar(col 0:date, date 1970-01-01) -> 4:boolean, DateColGreaterDateScalar(col 0:date, date 1970-01-01) -> 5:boolean, DateColLessEqualDateScalar(col 0:date, date 2100-01-01) -> 6:boolean, DateColLessDateScalar(col 0:date, date 2100-01-01) -> 7:boolean, DateScalarNotEqualDateColumn(date 1970-01-01, col 0:date) -> 8:boolean, DateScalarLessEqualDateColumn(date 1970-01-01, col 0:date) -> 9:boolean, DateScalarLessDateColumn(date 1970-01-01, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:boolean, VALUE._col1:boolean, VALUE._col2:boolean, VALUE._col3:boolean, VALUE._col4:boolean, VALUE._col5:boolean, VALUE._col6:boolean, VALUE._col7:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -373,10 +552,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 true true true true true true true true 2001-01-01 true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, -- should all be false @@ -390,7 +570,7 @@ select date '1970-01-01' > dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, -- should all be false @@ -404,6 +584,11 @@ select date '1970-01-01' > dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -421,26 +606,75 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), (dt1 = 1970-01-01) (type: boolean), (dt1 <= 1970-01-01) (type: boolean), (dt1 < 1970-01-01) (type: boolean), (dt1 >= 2100-01-01) (type: boolean), (dt1 > 2100-01-01) (type: boolean), (1970-01-01 = dt1) (type: boolean), (1970-01-01 >= dt1) (type: boolean), (1970-01-01 > dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: DateColEqualDateScalar(col 0:date, date 1970-01-01) -> 3:boolean, DateColLessEqualDateScalar(col 0:date, date 1970-01-01) -> 4:boolean, DateColLessDateScalar(col 0:date, date 1970-01-01) -> 5:boolean, DateColGreaterEqualDateScalar(col 0:date, date 2100-01-01) -> 6:boolean, DateColGreaterDateScalar(col 0:date, date 2100-01-01) -> 7:boolean, DateScalarEqualDateColumn(date 1970-01-01, col 0:date) -> 8:boolean, DateScalarGreaterEqualDateColumn(date 1970-01-01, col 0:date) -> 9:boolean, DateScalarGreaterDateColumn(date 1970-01-01, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:boolean, VALUE._col1:boolean, VALUE._col2:boolean, VALUE._col3:boolean, VALUE._col4:boolean, VALUE._col5:boolean, VALUE._col6:boolean, VALUE._col7:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -483,10 +717,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 false false false false false false false false 2001-01-01 false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -499,7 +734,7 @@ where and dt2 >= dt1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -512,6 +747,11 @@ where and dt2 >= dt1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -529,29 +769,81 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 0:date, col 0:date), FilterLongColNotEqualLongColumn(col 0:date, col 1:date), FilterLongColLessLongColumn(col 0:date, col 1:date), FilterLongColLessEqualLongColumn(col 0:date, col 1:date), FilterLongColGreaterLongColumn(col 1:date, col 0:date), FilterLongColGreaterEqualLongColumn(col 1:date, col 0:date)) predicate: ((dt1 < dt2) and (dt1 <= dt2) and (dt1 <> dt2) and (dt1 = dt1) and (dt2 > dt1) and (dt2 >= dt1)) (type: boolean) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dt1 (type: date), dt2 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -592,9 +884,10 @@ order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 1999-12-31 2000-01-01 2001-01-01 2001-06-01 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -609,7 +902,7 @@ where and date '1970-01-01' <= dt1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -624,6 +917,11 @@ where and date '1970-01-01' <= dt1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -638,15 +936,30 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDateScalarEqualDateColumn(val 11323, col 0:date), FilterDateColNotEqualDateScalar(col 0:date, val 0), FilterDateScalarNotEqualDateColumn(val 0, col 0:date)) predicate: ((1970-01-01 <> dt1) and (2001-01-01 = dt1) and (dt1 <> 1970-01-01)) (type: boolean) Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 2001-01-01 (type: date), dt2 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 1] + selectExpressions: ConstantVectorExpression(val 11323) -> 3:date Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -654,6 +967,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Stage: Stage-0 Fetch Operator @@ -693,13 +1021,15 @@ order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 2001-01-01 2001-06-01 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01') PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01') POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -720,6 +1050,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -756,6 +1087,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -771,6 +1108,7 @@ POSTHOOK: query: SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 2001-01-01 PREHOOK: query: drop table vector_date_1 PREHOOK: type: DROPTABLE diff --git ql/src/test/results/clientpositive/llap/vector_empty_where.q.out ql/src/test/results/clientpositive/llap/vector_empty_where.q.out new file mode 100644 index 0000000..494c5c9 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_empty_where.q.out @@ -0,0 +1,652 @@ +PREHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cstring1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cstring1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 13:boolean)(children: CastStringToBoolean(col 6) -> 13:boolean) + predicate: cstring1 (type: string) + Statistics: Num rows: 6144 Data size: 449620 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 6144 Data size: 449620 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where cstring1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where cstring1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +6041 +PREHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cint +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cint +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 13:boolean)(children: CastLongToBooleanViaLongToLong(col 2:int) -> 13:boolean) + predicate: cint (type: int) + Statistics: Num rows: 6144 Data size: 18348 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +6082 +PREHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cfloat +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cfloat +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 13:boolean)(children: CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 13:boolean) + predicate: cfloat (type: float) + Statistics: Num rows: 6144 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 6144 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where cfloat +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where cfloat +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3022 +PREHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where ctimestamp1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where ctimestamp1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 528216 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 13:boolean)(children: CastTimestampToBoolean(col 8:timestamp) -> 13:boolean) + predicate: ctimestamp1 (type: timestamp) + Statistics: Num rows: 6144 Data size: 264108 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 6144 Data size: 264108 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3022 diff --git ql/src/test/results/clientpositive/llap/vector_gather_stats.q.out ql/src/test/results/clientpositive/llap/vector_gather_stats.q.out new file mode 100644 index 0000000..e777242 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_gather_stats.q.out @@ -0,0 +1,108 @@ +PREHOOK: query: create table cd +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +partitioned by +( + cd_education_status string +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@cd +POSTHOOK: query: create table cd +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +partitioned by +( + cd_education_status string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cd +PREHOOK: query: alter table cd add partition (cd_education_status='Primary') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@cd +POSTHOOK: query: alter table cd add partition (cd_education_status='Primary') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@cd +POSTHOOK: Output: default@cd@cd_education_status=Primary +PREHOOK: query: insert into table cd partition (cd_education_status='Primary') values (1, 'M', 'M', 500, 'Good', 0, 0, 0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@cd@cd_education_status=Primary +POSTHOOK: query: insert into table cd partition (cd_education_status='Primary') values (1, 'M', 'M', 500, 'Good', 0, 0, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@cd@cd_education_status=Primary +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_credit_rating SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_demo_sk SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_dep_college_count SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_dep_count SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_dep_employed_count SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_gender SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_marital_status SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_purchase_estimate SCRIPT [] +PREHOOK: query: explain vectorization detail +analyze table cd partition (cd_education_status) compute statistics +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +analyze table cd partition (cd_education_status) compute statistics +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: cd + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: TABLESCAN operator: gather stats not supported + vectorized: false + + Stage: Stage-2 + Stats Work + Basic Stats Work: + +PREHOOK: query: analyze table cd partition (cd_education_status) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@cd +PREHOOK: Input: default@cd@cd_education_status=Primary +PREHOOK: Output: default@cd +PREHOOK: Output: default@cd@cd_education_status=Primary +POSTHOOK: query: analyze table cd partition (cd_education_status) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cd +POSTHOOK: Input: default@cd@cd_education_status=Primary +POSTHOOK: Output: default@cd +POSTHOOK: Output: default@cd@cd_education_status=Primary diff --git ql/src/test/results/clientpositive/llap/vector_if_expr_2.q.out ql/src/test/results/clientpositive/llap/vector_if_expr_2.q.out new file mode 100644 index 0000000..f4baa69 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_if_expr_2.q.out @@ -0,0 +1,136 @@ +PREHOOK: query: drop table if exists foo +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists foo +POSTHOOK: type: DROPTABLE +PREHOOK: query: create temporary table foo (x int, y int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create temporary table foo (x int, y int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: insert into foo values(1,1),(2,NULL),(3,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@foo +POSTHOOK: query: insert into foo values(1,1),(2,NULL),(3,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.x SCRIPT [] +POSTHOOK: Lineage: foo.y SCRIPT [] +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +select x, IF(x > 0,y,0) from foo order by x +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +select x, IF(x > 0,y,0) from foo order by x +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: foo + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Select Operator + expressions: x (type: int), if((x > 0), y, 0) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4] + selectExpressions: IfExprLongColumnLongScalar(col 3:boolean, col 1:int, val 0)(children: LongColGreaterLongScalar(col 0:int, val 0) -> 3:boolean) -> 4:int + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select x, IF(x > 0,y,0) from foo order by x +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select x, IF(x > 0,y,0) from foo order by x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +1 1 +2 NULL +3 1 +PREHOOK: query: select x, IF(x > 0,y,0) from foo order by x +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select x, IF(x > 0,y,0) from foo order by x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +1 1 +2 NULL +3 1 diff --git ql/src/test/results/clientpositive/llap/vector_interval_1.q.out ql/src/test/results/clientpositive/llap/vector_interval_1.q.out index 1be7232..7d891db 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_1.q.out @@ -24,6 +24,7 @@ POSTHOOK: Lineage: vector_interval_1.dt SIMPLE [] POSTHOOK: Lineage: vector_interval_1.str1 SIMPLE [] POSTHOOK: Lineage: vector_interval_1.str2 SIMPLE [] POSTHOOK: Lineage: vector_interval_1.ts SIMPLE [] +_c0 _c1 _c2 _c3 PREHOOK: query: insert into vector_interval_1 select null, null, null, null from src limit 1 PREHOOK: type: QUERY @@ -38,6 +39,18 @@ POSTHOOK: Lineage: vector_interval_1.dt EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION [] +_col0 _col1 _col2 _col3 +PREHOOK: query: select * from vector_interval_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from vector_interval_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +vector_interval_1.ts vector_interval_1.dt vector_interval_1.str1 vector_interval_1.str2 +2001-01-01 01:02:03 2001-01-01 1-2 1 2:3:4 +NULL NULL NULL NULL PREHOOK: query: explain vectorization expression select str1, @@ -52,6 +65,7 @@ select interval '1 2:3:4' day to second, interval_day_time(str2) from vector_interval_1 order by str1 POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -155,6 +169,7 @@ from vector_interval_1 order by str1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +str1 _c1 _c2 _c3 _c4 NULL 1-2 NULL 1 02:03:04.000000000 NULL 1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000 PREHOOK: query: explain vectorization expression @@ -179,6 +194,7 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -290,6 +306,7 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 _c4 _c5 _c6 NULL 2-4 NULL NULL 0-0 NULL NULL 2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0 PREHOOK: query: explain vectorization expression @@ -314,6 +331,7 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -425,6 +443,7 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 _c4 _c5 _c6 NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL NULL 2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2 04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: explain vectorization expression @@ -461,6 +480,7 @@ select dt - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -583,6 +603,7 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01 1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56 2000-12-30 21:56:56 PREHOOK: query: explain vectorization expression @@ -619,6 +640,7 @@ select ts - interval_day_time(str2) from vector_interval_1 order by ts POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -741,6 +763,7 @@ from vector_interval_1 order by ts POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +ts _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03 1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59 2000-12-30 22:58:59 PREHOOK: query: explain vectorization expression @@ -759,6 +782,7 @@ select ts - timestamp '2001-01-01 01:02:03' from vector_interval_1 order by ts POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -863,6 +887,7 @@ from vector_interval_1 order by ts POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +ts _c1 _c2 _c3 NULL NULL NULL NULL 2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: explain vectorization expression @@ -881,6 +906,7 @@ select dt - date '2001-01-01' from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -985,6 +1011,7 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 NULL NULL NULL NULL 2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: explain vectorization expression @@ -1009,6 +1036,7 @@ select date '2001-01-01' - ts from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -1119,5 +1147,6 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 _c4 _c5 _c6 NULL NULL NULL NULL NULL NULL NULL 2001-01-01 0 01:02:03.000000000 0 01:02:03.000000000 0 01:02:03.000000000 -0 01:02:03.000000000 -0 01:02:03.000000000 -0 01:02:03.000000000 diff --git ql/src/test/results/clientpositive/llap/vector_join.q.out ql/src/test/results/clientpositive/llap/vector_join.q.out new file mode 100644 index 0000000..94c0290 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_join.q.out @@ -0,0 +1,104 @@ +PREHOOK: query: DROP TABLE IF EXISTS test1_vc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test1_vc +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS test2_vc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test2_vc +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test1_vc + ( + id string) + PARTITIONED BY ( + cr_year bigint, + cr_month bigint) + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' +TBLPROPERTIES ( + 'serialization.null.format'='' ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test1_vc +POSTHOOK: query: CREATE TABLE test1_vc + ( + id string) + PARTITIONED BY ( + cr_year bigint, + cr_month bigint) + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' +TBLPROPERTIES ( + 'serialization.null.format'='' ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1_vc +PREHOOK: query: CREATE TABLE test2_vc( + id string + ) + PARTITIONED BY ( + cr_year bigint, + cr_month bigint) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' +TBLPROPERTIES ( + 'serialization.null.format'='' + ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2_vc +POSTHOOK: query: CREATE TABLE test2_vc( + id string + ) + PARTITIONED BY ( + cr_year bigint, + cr_month bigint) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' +TBLPROPERTIES ( + 'serialization.null.format'='' + ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2_vc +PREHOOK: query: SELECT cr.id1 , +cr.id2 +FROM +(SELECT t1.id id1, + t2.id id2 + from + (select * from test1_vc ) t1 + left outer join test2_vc t2 + on t1.id=t2.id) cr +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_vc +PREHOOK: Input: default@test2_vc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cr.id1 , +cr.id2 +FROM +(SELECT t1.id id1, + t2.id id2 + from + (select * from test1_vc ) t1 + left outer join test2_vc t2 + on t1.id=t2.id) cr +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_vc +POSTHOOK: Input: default@test2_vc +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out new file mode 100644 index 0000000..5e168a9 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out @@ -0,0 +1,175 @@ +PREHOOK: query: drop table if exists char_part_tbl1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_part_tbl1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_part_tbl2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_part_tbl2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age) into 2 buckets stored as orc tblproperties('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studenttab +POSTHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age) into 2 buckets stored as orc tblproperties('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studenttab +PREHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00), ('(yuri xylophone',30,2.74),('alice underhill',46,3.50) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@studenttab +POSTHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00), ('(yuri xylophone',30,2.74),('alice underhill',46,3.50) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@studenttab +POSTHOOK: Lineage: studenttab.age SCRIPT [] +POSTHOOK: Lineage: studenttab.gpa SCRIPT [] +POSTHOOK: Lineage: studenttab.name SCRIPT [] +PREHOOK: query: create table char_tbl1(name string, age int) partitioned by(gpa char(50)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_tbl1 +POSTHOOK: query: create table char_tbl1(name string, age int) partitioned by(gpa char(50)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_tbl1 +PREHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_tbl2 +POSTHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_tbl2 +PREHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl1@gpa=3.5 +POSTHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl1@gpa=3.5 +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab where gpa = 2.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl1@gpa=2.5 +POSTHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab where gpa = 2.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl1@gpa=2.5 +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl2@gpa=3.5 +POSTHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl2@gpa=3.5 +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab where gpa = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl2@gpa=3 +POSTHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab where gpa = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl2@gpa=3 +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: show partitions char_tbl1 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@char_tbl1 +POSTHOOK: query: show partitions char_tbl1 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@char_tbl1 +gpa=2.5 +gpa=3.5 +PREHOOK: query: show partitions char_tbl2 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@char_tbl2 +POSTHOOK: query: show partitions char_tbl2 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@char_tbl2 +gpa=3 +gpa=3.5 +PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 llap + File Output Operator [FS_10] + Merge Join Operator [MERGEJOIN_21] (rows=2 width=429) + Conds:RS_23._col2=RS_28._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_23] + PartitionCols:_col2 + Select Operator [SEL_22] (rows=2 width=237) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=2 width=237) + default@char_tbl1,c1,Tbl:COMPLETE,Col:COMPLETE,Output:["name","age"] + Dynamic Partitioning Event Operator [EVENT_26] (rows=1 width=237) + Group By Operator [GBY_25] (rows=1 width=237) + Output:["_col0"],keys:_col0 + Select Operator [SEL_24] (rows=2 width=237) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_22] + <-Map 3 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_28] + PartitionCols:_col2 + Select Operator [SEL_27] (rows=2 width=192) + Output:["_col0","_col1","_col2"] + TableScan [TS_3] (rows=2 width=192) + default@char_tbl2,c2,Tbl:COMPLETE,Col:COMPLETE,Output:["name","age"] + +PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +PREHOOK: Input: default@char_tbl1 +PREHOOK: Input: default@char_tbl1@gpa=2.5 +PREHOOK: Input: default@char_tbl1@gpa=3.5 +PREHOOK: Input: default@char_tbl2 +PREHOOK: Input: default@char_tbl2@gpa=3 +PREHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_tbl1 +POSTHOOK: Input: default@char_tbl1@gpa=2.5 +POSTHOOK: Input: default@char_tbl1@gpa=3.5 +POSTHOOK: Input: default@char_tbl2 +POSTHOOK: Input: default@char_tbl2@gpa=3 +POSTHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +alice underhill 46 3.5 alice underhill 46 3.5 +PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +PREHOOK: Input: default@char_tbl1 +PREHOOK: Input: default@char_tbl1@gpa=2.5 +PREHOOK: Input: default@char_tbl1@gpa=3.5 +PREHOOK: Input: default@char_tbl2 +PREHOOK: Input: default@char_tbl2@gpa=3 +PREHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_tbl1 +POSTHOOK: Input: default@char_tbl1@gpa=2.5 +POSTHOOK: Input: default@char_tbl1@gpa=3.5 +POSTHOOK: Input: default@char_tbl2 +POSTHOOK: Input: default@char_tbl2@gpa=3 +POSTHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +alice underhill 46 3.5 alice underhill 46 3.5 diff --git ql/src/test/results/clientpositive/llap/vector_like_2.q.out ql/src/test/results/clientpositive/llap/vector_like_2.q.out new file mode 100644 index 0000000..5c62e31 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_like_2.q.out @@ -0,0 +1,151 @@ +PREHOOK: query: drop table if exists foo +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists foo +POSTHOOK: type: DROPTABLE +PREHOOK: query: create temporary table foo (a string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create temporary table foo (a string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: insert into foo values("some foo"),("some bar"),(null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@foo +POSTHOOK: query: insert into foo values("some foo"),("some bar"),(null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.a SCRIPT [] +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +select a, a like "%bar" from foo order by a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +select a, a like "%bar" from foo order by a +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: foo + Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:string, 1:ROW__ID:struct] + Select Operator + expressions: a (type: string), (a like '%bar') (type: boolean) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2] + selectExpressions: SelectStringColLikeStringScalar(col 0:string) -> 2:boolean + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: a:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, a like "%bar" from foo order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select a, a like "%bar" from foo order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +NULL NULL +some bar true +some foo false +PREHOOK: query: select a, a like "%bar" from foo order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select a, a like "%bar" from foo order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +NULL NULL +some bar true +some foo false diff --git ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out new file mode 100644 index 0000000..d02fa08 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out @@ -0,0 +1,51 @@ +PREHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cint) IN (UDFToInteger(ctinyint), UDFToInteger(cbigint)) (type: boolean) + Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: FILTER operator: Vectorizing IN expression only supported for constant values + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out new file mode 100644 index 0000000..ee0e664 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out @@ -0,0 +1,274 @@ +PREHOOK: query: CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@non_string_part +POSTHOOK: query: CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@non_string_part +PREHOOK: query: INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc +WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@non_string_part +POSTHOOK: query: INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc +WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +PREHOOK: query: SHOW PARTITIONS non_string_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@non_string_part +POSTHOOK: query: SHOW PARTITIONS non_string_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@non_string_part +ctinyint=__HIVE_DEFAULT_PARTITION__ +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: non_string_part + Statistics: Num rows: 3073 Data size: 24584 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 0) + predicate: (cint > 0) (type: boolean) + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int), ctinyint (type: tinyint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4] + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: tinyint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: tinyint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@non_string_part +PREHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_string_part +POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +762 NULL +762 NULL +6981 NULL +6981 NULL +6981 NULL +86028 NULL +504142 NULL +799471 NULL +1248059 NULL +1286921 NULL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: non_string_part + Statistics: Num rows: 3073 Data size: 313446 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 0) + predicate: (cint > 0) (type: boolean) + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int), cstring1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@non_string_part +PREHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_string_part +POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +762 3WsVeqb28VWEEOLI8ail +762 40ks5556SV +6981 1FNNhmiFLGw425NA13g +6981 o5mb0QP5Y48Qd4vdB0 +6981 sF2CRfgt2K +86028 T2o8XRFAL0HC4ikDQnfoCymw +504142 PlOxor04p5cvVl +799471 2fu24 +1248059 Uhps6mMh3IfHB3j7yH62K +1286921 ODLrXI8882q8LS8 diff --git ql/src/test/results/clientpositive/llap/vector_orc_string_reader_empty_dict.q.out ql/src/test/results/clientpositive/llap/vector_orc_string_reader_empty_dict.q.out new file mode 100644 index 0000000..4f00bed --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_orc_string_reader_empty_dict.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: create table orcstr (vcol varchar(20)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orcstr +POSTHOOK: query: create table orcstr (vcol varchar(20)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcstr +PREHOOK: query: insert overwrite table orcstr select null from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcstr +POSTHOOK: query: insert overwrite table orcstr select null from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcstr +POSTHOOK: Lineage: orcstr.vcol EXPRESSION [] +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +NULL +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +NULL +PREHOOK: query: insert overwrite table orcstr select "" from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcstr +POSTHOOK: query: insert overwrite table orcstr select "" from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcstr +POSTHOOK: Lineage: orcstr.vcol EXPRESSION [] +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### + +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### + diff --git ql/src/test/results/clientpositive/llap/vector_order_null.q.out ql/src/test/results/clientpositive/llap/vector_order_null.q.out new file mode 100644 index 0000000..9ff8f87 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_order_null.q.out @@ -0,0 +1,1427 @@ +PREHOOK: query: create table src_null (a int, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_null +POSTHOOK: query: create table src_null (a int, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_null +PREHOOK: query: insert into src_null values (1, 'A') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (1, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a SCRIPT [] +POSTHOOK: Lineage: src_null.b SCRIPT [] +col1 col2 +PREHOOK: query: insert into src_null values (null, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [] +POSTHOOK: Lineage: src_null.b EXPRESSION [] +_col0 _col1 +PREHOOK: query: insert into src_null values (3, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (3, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a SCRIPT [] +POSTHOOK: Lineage: src_null.b EXPRESSION [] +_col0 _col1 +PREHOOK: query: insert into src_null values (2, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a SCRIPT [] +POSTHOOK: Lineage: src_null.b EXPRESSION [] +_col0 _col1 +PREHOOK: query: insert into src_null values (2, 'A') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a SCRIPT [] +POSTHOOK: Lineage: src_null.b SCRIPT [] +col1 col2 +PREHOOK: query: insert into src_null values (2, 'B') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, 'B') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a SCRIPT [] +POSTHOOK: Lineage: src_null.b SCRIPT [] +col1 col2 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc, b asc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc, b asc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc, b asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc, b asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +NULL NULL +1 A +2 NULL +2 A +2 B +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc, b asc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc, b asc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: -+ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: za + reduceColumnSortOrder: -+ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc, b asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc, b asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +3 NULL +2 NULL +2 A +2 B +1 A +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 NULL +3 NULL +NULL NULL +1 A +2 A +2 B +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc, a asc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc, a asc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: -+ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: za + reduceColumnSortOrder: -+ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc, a asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc, a asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 B +1 A +2 A +NULL NULL +2 NULL +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +NULL NULL +1 A +2 NULL +2 A +2 B +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: -+ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: -+ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +NULL NULL +3 NULL +2 NULL +2 A +2 B +1 A +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls last, a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls last, a +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: za + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last, a +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last, a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +1 A +2 A +2 B +NULL NULL +2 NULL +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: -+ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: za + reduceColumnSortOrder: -+ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 B +1 A +2 A +NULL NULL +2 NULL +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: +- + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: zz + reduceColumnSortOrder: +- + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +1 A +2 B +2 A +2 NULL +3 NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: -- + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: zz + reduceColumnSortOrder: -- + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 B +2 A +1 A +3 NULL +2 NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 NULL +3 NULL +NULL NULL +1 A +2 A +2 B diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out new file mode 100644 index 0000000..9064e2b --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -0,0 +1,2850 @@ +PREHOOK: query: DROP TABLE IF EXISTS e011_01 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS e011_01 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS e011_02 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS e011_02 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS e011_03 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS e011_03 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE e011_01 ( + c1 decimal(15,2), + c2 decimal(15,2)) + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_01 +POSTHOOK: query: CREATE TABLE e011_01 ( + c1 decimal(15,2), + c2 decimal(15,2)) + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_01 +PREHOOK: query: CREATE TABLE e011_02 ( + c1 decimal(15,2), + c2 decimal(15,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_02 +POSTHOOK: query: CREATE TABLE e011_02 ( + c1 decimal(15,2), + c2 decimal(15,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_02 +PREHOOK: query: CREATE TABLE e011_03 ( + c1 decimal(15,2), + c2 decimal(15,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_03 +POSTHOOK: query: CREATE TABLE e011_03 ( + c1 decimal(15,2), + c2 decimal(15,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_03 +PREHOOK: query: CREATE TABLE e011_01_small ( + c1 decimal(7,2), + c2 decimal(7,2)) + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_01_small +POSTHOOK: query: CREATE TABLE e011_01_small ( + c1 decimal(7,2), + c2 decimal(7,2)) + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_01_small +PREHOOK: query: CREATE TABLE e011_02_small ( + c1 decimal(7,2), + c2 decimal(7,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_02_small +POSTHOOK: query: CREATE TABLE e011_02_small ( + c1 decimal(7,2), + c2 decimal(7,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_02_small +PREHOOK: query: CREATE TABLE e011_03_small ( + c1 decimal(7,2), + c2 decimal(7,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_03_small +POSTHOOK: query: CREATE TABLE e011_03_small ( + c1 decimal(7,2), + c2 decimal(7,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_03_small +PREHOOK: query: LOAD DATA + LOCAL INPATH '../../data/files/e011_01.txt' + OVERWRITE + INTO TABLE e011_01 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@e011_01 +POSTHOOK: query: LOAD DATA + LOCAL INPATH '../../data/files/e011_01.txt' + OVERWRITE + INTO TABLE e011_01 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@e011_01 +PREHOOK: query: INSERT INTO TABLE e011_02 + SELECT c1, c2 + FROM e011_01 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Output: default@e011_02 +POSTHOOK: query: INSERT INTO TABLE e011_02 + SELECT c1, c2 + FROM e011_01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Output: default@e011_02 +POSTHOOK: Lineage: e011_02.c1 SIMPLE [(e011_01)e011_01.FieldSchema(name:c1, type:decimal(15,2), comment:null), ] +POSTHOOK: Lineage: e011_02.c2 SIMPLE [(e011_01)e011_01.FieldSchema(name:c2, type:decimal(15,2), comment:null), ] +c1 c2 +PREHOOK: query: INSERT INTO TABLE e011_03 + SELECT c1, c2 + FROM e011_01 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Output: default@e011_03 +POSTHOOK: query: INSERT INTO TABLE e011_03 + SELECT c1, c2 + FROM e011_01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Output: default@e011_03 +POSTHOOK: Lineage: e011_03.c1 SIMPLE [(e011_01)e011_01.FieldSchema(name:c1, type:decimal(15,2), comment:null), ] +POSTHOOK: Lineage: e011_03.c2 SIMPLE [(e011_01)e011_01.FieldSchema(name:c2, type:decimal(15,2), comment:null), ] +c1 c2 +PREHOOK: query: LOAD DATA + LOCAL INPATH '../../data/files/e011_01.txt' + OVERWRITE + INTO TABLE e011_01_small +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@e011_01_small +POSTHOOK: query: LOAD DATA + LOCAL INPATH '../../data/files/e011_01.txt' + OVERWRITE + INTO TABLE e011_01_small +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@e011_01_small +PREHOOK: query: INSERT INTO TABLE e011_02_small + SELECT c1, c2 + FROM e011_01_small +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +PREHOOK: Output: default@e011_02_small +POSTHOOK: query: INSERT INTO TABLE e011_02_small + SELECT c1, c2 + FROM e011_01_small +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +POSTHOOK: Output: default@e011_02_small +POSTHOOK: Lineage: e011_02_small.c1 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c1, type:decimal(7,2), comment:null), ] +POSTHOOK: Lineage: e011_02_small.c2 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c2, type:decimal(7,2), comment:null), ] +c1 c2 +PREHOOK: query: INSERT INTO TABLE e011_03_small + SELECT c1, c2 + FROM e011_01_small +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +PREHOOK: Output: default@e011_03_small +POSTHOOK: query: INSERT INTO TABLE e011_03_small + SELECT c1, c2 + FROM e011_01_small +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +POSTHOOK: Output: default@e011_03_small +POSTHOOK: Lineage: e011_03_small.c1 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c1, type:decimal(7,2), comment:null), ] +POSTHOOK: Lineage: e011_03_small.c2 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c2, type:decimal(7,2), comment:null), ] +c1 c2 +PREHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Output: default@e011_01 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Output: default@e011_01 +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_02 +PREHOOK: Output: default@e011_02 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_02 +POSTHOOK: Output: default@e011_02 +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_03 +PREHOOK: Output: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_03 +POSTHOOK: Output: default@e011_03 +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: ANALYZE TABLE e011_01_small COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +PREHOOK: Output: default@e011_01_small +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_01_small COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +POSTHOOK: Output: default@e011_01_small +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: ANALYZE TABLE e011_02_small COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_02_small +PREHOOK: Output: default@e011_02_small +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_02_small COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_02_small +POSTHOOK: Output: default@e011_02_small +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: ANALYZE TABLE e011_03_small COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_03_small +PREHOOK: Output: default@e011_03_small +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_03_small COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_03_small +POSTHOOK: Output: default@e011_03_small +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: explain vectorization detail +select sum(sum(c1)) over() from e011_01 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(c1)) over() from e011_01 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(c1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(15,2)) -> decimal(25,2) + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(25,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(25,2)) -> decimal(25,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + keyExpressions: ConstantVectorExpression(val 0) -> 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(25,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(35,2), bigint] + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col0 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 1:decimal(25,2)] + functionNames: [sum] + keyInputColumns: [] + native: true + nonKeyInputColumns: [1] + orderExpressions: [ConstantVectorExpression(val 0) -> 3:int] + outputColumns: [2, 1] + outputTypes: [decimal(35,2), decimal(25,2)] + streamingColumns: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(c1)) over() from e011_01 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(c1)) over() from e011_01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +#### A masked pattern was here #### +_c0 +16.00 +PREHOOK: query: explain vectorization detail +select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: c1, c2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(c1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(15,2)) -> decimal(25,2) + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:decimal(15,2), col 1:decimal(15,2) + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(15,2), KEY._col1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(25,2)) -> decimal(25,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(15,2), col 1:decimal(15,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(15,2), KEY.reducesinkkey1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(35,2)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2:decimal(25,2)] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:decimal(15,2)] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(35,2), decimal(15,2), decimal(15,2), decimal(25,2)] + partitionExpressions: [col 0:decimal(15,2)] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: e011_03 + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(15,2)) + 1 _col0 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(15,2), KEY._col1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(25,2)) -> decimal(25,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(15,2), col 1:decimal(15,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(15,2), KEY.reducesinkkey1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(35,2)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2:decimal(25,2)] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:decimal(15,2)] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(35,2), decimal(15,2), decimal(15,2), decimal(25,2)] + partitionExpressions: [col 0:decimal(15,2)] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Input: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Input: default@e011_03 +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_03 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(15,2)) + 1 _col0 (type: decimal(15,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(15,2), KEY._col1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(25,2)) -> decimal(25,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(15,2), col 1:decimal(15,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(15,2), KEY.reducesinkkey1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(35,2)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2:decimal(25,2)] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:decimal(15,2)] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(35,2), decimal(15,2), decimal(15,2), decimal(25,2)] + partitionExpressions: [col 0:decimal(15,2)] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Input: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Input: default@e011_03 +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: e011_03 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(15,2)) + 1 _col0 (type: decimal(15,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 1792 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: corr(_col0, _col2) + keys: _col1 (type: decimal(15,2)), _col3 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF corr not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: corr(VALUE._col0) + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)), _col2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Input: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Input: default@e011_03 +#### A masked pattern was here #### +sum_window_0 +NULL +NULL +NULL +NULL +PREHOOK: query: explain vectorization detail +select sum(sum(c1)) over() from e011_01_small +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(c1)) over() from e011_01_small +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01_small + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Select Operator + expressions: c1 (type: decimal(7,2)) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(c1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(7,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + keyExpressions: ConstantVectorExpression(val 0) -> 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(17,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(27,2), bigint] + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col0 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 1:decimal(17,2)] + functionNames: [sum] + keyInputColumns: [] + native: true + nonKeyInputColumns: [1] + orderExpressions: [ConstantVectorExpression(val 0) -> 3:int] + outputColumns: [2, 1] + outputTypes: [decimal(27,2), decimal(17,2)] + streamingColumns: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(27,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(c1)) over() from e011_01_small +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(c1)) over() from e011_01_small +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +#### A masked pattern was here #### +_c0 +16.00 +PREHOOK: query: explain vectorization detail +select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01_small + group by e011_01_small.c1, e011_01_small.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01_small + group by e011_01_small.c1, e011_01_small.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01_small + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Select Operator + expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + outputColumnNames: c1, c2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(c1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(7,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(7,2), KEY._col1:decimal(7,2), VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(17,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(7,2), KEY.reducesinkkey1:decimal(7,2), VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(27,2)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2:decimal(17,2)] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:decimal(7,2)] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(27,2), decimal(7,2), decimal(7,2), decimal(17,2)] + partitionExpressions: [col 0:decimal(7,2)] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(27,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01_small + group by e011_01_small.c1, e011_01_small.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01_small + group by e011_01_small.c1, e011_01_small.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(sum(e011_01_small.c1)) over( + partition by e011_01_small.c2 order by e011_01_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_01_small.c1, e011_01_small.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(e011_01_small.c1)) over( + partition by e011_01_small.c2 order by e011_01_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_01_small.c1, e011_01_small.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01_small + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: e011_03_small + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(7,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(7,2)) + 1 _col0 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(17,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(7,2), KEY._col1:decimal(7,2), VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(17,2)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(7,2), KEY.reducesinkkey1:decimal(7,2), VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(27,2)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2:decimal(17,2)] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:decimal(7,2)] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(27,2), decimal(7,2), decimal(7,2), decimal(17,2)] + partitionExpressions: [col 0:decimal(7,2)] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(27,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(e011_01_small.c1)) over( + partition by e011_01_small.c2 order by e011_01_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_01_small.c1, e011_01_small.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +PREHOOK: Input: default@e011_03_small +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(e011_01_small.c1)) over( + partition by e011_01_small.c2 order by e011_01_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_01_small.c1, e011_01_small.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +POSTHOOK: Input: default@e011_03_small +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(sum(e011_01_small.c1)) over( + partition by e011_03_small.c2 order by e011_03_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c1, e011_03_small.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(e011_01_small.c1)) over( + partition by e011_03_small.c2 order by e011_03_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c1, e011_03_small.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_03_small + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: e011_01_small + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(7,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(7,2)) + 1 _col0 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(17,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(7,2), KEY._col1:decimal(7,2), VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(17,2)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(7,2), KEY.reducesinkkey1:decimal(7,2), VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(27,2)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2:decimal(17,2)] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:decimal(7,2)] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(27,2), decimal(7,2), decimal(7,2), decimal(17,2)] + partitionExpressions: [col 0:decimal(7,2)] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(27,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(e011_01_small.c1)) over( + partition by e011_03_small.c2 order by e011_03_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c1, e011_03_small.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +PREHOOK: Input: default@e011_03_small +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(e011_01_small.c1)) over( + partition by e011_03_small.c2 order by e011_03_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c1, e011_03_small.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +POSTHOOK: Input: default@e011_03_small +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(corr(e011_01_small.c1, e011_03_small.c1)) + over(partition by e011_01_small.c2 order by e011_03_small.c2) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c2, e011_01_small.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(corr(e011_01_small.c1, e011_03_small.c1)) + over(partition by e011_01_small.c2 order by e011_03_small.c2) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c2, e011_01_small.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01_small + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: e011_03_small + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(7,2)) + 1 _col0 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 1792 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: corr(_col0, _col2) + keys: _col1 (type: decimal(7,2)), _col3 (type: decimal(7,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF corr not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: corr(VALUE._col0) + keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)), _col2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(corr(e011_01_small.c1, e011_03_small.c1)) + over(partition by e011_01_small.c2 order by e011_03_small.c2) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c2, e011_01_small.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +PREHOOK: Input: default@e011_03_small +#### A masked pattern was here #### +POSTHOOK: query: select sum(corr(e011_01_small.c1, e011_03_small.c1)) + over(partition by e011_01_small.c2 order by e011_03_small.c2) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c2, e011_01_small.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +POSTHOOK: Input: default@e011_03_small +#### A masked pattern was here #### +sum_window_0 +NULL +NULL +NULL +NULL diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index 5eaed53..16b59e6 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -6105,6 +6105,152 @@ Manufacturer#5 almond antique medium spring khaki 1611.66 3 2 2 1789.69 1611.66 Manufacturer#5 almond antique sky peru orange 1788.73 4 4 3 1789.69 1788.73 4 4 Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 5 4 1789.69 1018.1 5 5 Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 6 5 1789.69 1464.48 6 6 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 4216 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 40 Data size: 4216 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 40 Data size: 14936 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col0 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 0:string] + functionNames: [rank] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1] + orderExpressions: [col 0:string] + outputColumns: [2, 0, 1] + outputTypes: [int, string, double] + streamingColumns: [2] + Statistics: Num rows: 40 Data size: 14936 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 40 Data size: 4344 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 4344 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select p_mfgr, p_retailprice, rank() over(partition by p_mfgr) as r from vector_ptf_part_simple_orc @@ -6158,112 +6304,525 @@ Manufacturer#3 1922.98 1 Manufacturer#3 1190.27 1 Manufacturer#3 NULL 1 Manufacturer#3 99.68 1 -PREHOOK: query: select p_mfgr, p_retailprice, +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, rank() over(partition by p_mfgr order by p_name) as r from vector_ptf_part_simple_orc PREHOOK: type: QUERY -PREHOOK: Input: default@vector_ptf_part_simple_orc -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, rank() over(partition by p_mfgr order by p_name) as r from vector_ptf_part_simple_orc POSTHOOK: type: QUERY -POSTHOOK: Input: default@vector_ptf_part_simple_orc -#### A masked pattern was here #### -p_mfgr p_retailprice r -Manufacturer#1 1173.15 1 -Manufacturer#1 1173.15 1 -Manufacturer#1 1753.76 3 -Manufacturer#1 1753.76 3 -Manufacturer#1 1753.76 3 -Manufacturer#1 1753.76 3 -Manufacturer#1 1602.59 7 -Manufacturer#1 1414.42 8 -Manufacturer#1 1632.66 9 -Manufacturer#1 NULL 9 -Manufacturer#1 1632.66 9 -Manufacturer#1 1632.66 9 -Manufacturer#2 1690.68 1 -Manufacturer#2 1800.7 2 -Manufacturer#2 1800.7 2 -Manufacturer#2 1800.7 2 -Manufacturer#2 2031.98 5 -Manufacturer#2 900.66 6 -Manufacturer#2 1698.66 6 -Manufacturer#2 1000.6 8 -Manufacturer#3 99.68 1 -Manufacturer#3 590.27 2 -Manufacturer#3 NULL 2 -Manufacturer#3 1190.27 2 -Manufacturer#3 1190.27 2 -Manufacturer#3 55.39 6 -Manufacturer#3 1922.98 7 -Manufacturer#3 1337.29 8 -Manufacturer#4 NULL 1 -Manufacturer#4 1375.42 2 -Manufacturer#4 NULL 3 -Manufacturer#4 1206.26 3 -Manufacturer#4 1844.92 5 -Manufacturer#4 1290.35 6 -Manufacturer#5 1789.69 1 -Manufacturer#5 1611.66 2 -Manufacturer#5 1611.66 2 -Manufacturer#5 1788.73 4 -Manufacturer#5 1018.1 5 -Manufacturer#5 1464.48 6 -PREHOOK: query: select p_mfgr, p_name, p_retailprice, -rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r -from vector_ptf_part_simple_orc -PREHOOK: type: QUERY -PREHOOK: Input: default@vector_ptf_part_simple_orc +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_retailprice, -rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r -from vector_ptf_part_simple_orc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@vector_ptf_part_simple_orc + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### -p_mfgr p_name p_retailprice r -Manufacturer#1 almond antique burnished rose metallic 1173.15 1 -Manufacturer#1 almond antique burnished rose metallic 1173.15 1 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 -Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 -Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 -Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 -Manufacturer#1 almond aquamarine pink moccasin thistle NULL 9 -Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 -Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 -Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 -Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 -Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 -Manufacturer#2 almond aquamarine rose maroon antique 1698.66 6 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 -Manufacturer#3 almond antique chartreuse khaki white 99.68 1 -Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 -Manufacturer#3 almond antique forest lavender goldenrod NULL 2 -Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 -Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 -Manufacturer#3 almond antique metallic orange dim 55.39 6 -Manufacturer#3 almond antique misty red olive 1922.98 7 -Manufacturer#3 almond antique olive coral navajo 1337.29 8 -Manufacturer#4 almond antique gainsboro frosted violet NULL 1 -Manufacturer#4 almond antique violet mint lemon 1375.42 2 -Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 -Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 3 -Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 -Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0] + valueColumnNums: [2] + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1:string] + functionNames: [rank] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:string] + outputColumns: [3, 0, 1, 2] + outputTypes: [int, string, string, double] + partitionExpressions: [col 0:string] + streamingColumns: [3] + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3] + Statistics: Num rows: 40 Data size: 4344 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 4344 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_retailprice r +Manufacturer#1 1173.15 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1602.59 7 +Manufacturer#1 1414.42 8 +Manufacturer#1 1632.66 9 +Manufacturer#1 NULL 9 +Manufacturer#1 1632.66 9 +Manufacturer#1 1632.66 9 +Manufacturer#2 1690.68 1 +Manufacturer#2 1800.7 2 +Manufacturer#2 1800.7 2 +Manufacturer#2 1800.7 2 +Manufacturer#2 2031.98 5 +Manufacturer#2 900.66 6 +Manufacturer#2 1698.66 6 +Manufacturer#2 1000.6 8 +Manufacturer#3 99.68 1 +Manufacturer#3 590.27 2 +Manufacturer#3 NULL 2 +Manufacturer#3 1190.27 2 +Manufacturer#3 1190.27 2 +Manufacturer#3 55.39 6 +Manufacturer#3 1922.98 7 +Manufacturer#3 1337.29 8 +Manufacturer#4 NULL 1 +Manufacturer#4 1375.42 2 +Manufacturer#4 NULL 3 +Manufacturer#4 1206.26 3 +Manufacturer#4 1844.92 5 +Manufacturer#4 1290.35 6 +Manufacturer#5 1789.69 1 +Manufacturer#5 1611.66 2 +Manufacturer#5 1611.66 2 +Manufacturer#5 1788.73 4 +Manufacturer#5 1018.1 5 +Manufacturer#5 1464.48 6 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] + Reduce Output Operator + key expressions: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 6, 1] + keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 5:timestamp) -> 6:timestamp + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0, 9] + valueColumnNums: [2] + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp, timestamp, bigint, timestamp, timestamp] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:timestamp, KEY.reducesinkkey2:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, timestamp, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3] + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 2:string] + functionNames: [rank] + keyInputColumns: [0, 2] + native: true + nonKeyInputColumns: [3] + orderExpressions: [col 2:string] + outputColumns: [4, 0, 2, 3] + outputTypes: [int, string, string, double] + partitionExpressions: [col 0:string, IfExprColumnNull(col 5:boolean, col 6:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 5:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 6:timestamp) -> 7:timestamp] + streamingColumns: [4] + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3, 4] + Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice r +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 9 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 6 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 +Manufacturer#3 almond antique forest lavender goldenrod NULL 2 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 +Manufacturer#3 almond antique metallic orange dim 55.39 6 +Manufacturer#3 almond antique misty red olive 1922.98 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 3 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 Manufacturer#5 almond antique blue firebrick mint 1789.69 1 Manufacturer#5 almond antique medium spring khaki 1611.66 2 Manufacturer#5 almond antique medium spring khaki 1611.66 2 Manufacturer#5 almond antique sky peru orange 1788.73 4 Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] + Reduce Output Operator + key expressions: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 6] + keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 5:timestamp) -> 6:timestamp + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1, 2] + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp, timestamp] + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END ASC NULLS FIRST + partition by: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select p_mfgr, p_name, p_retailprice, rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r from vector_ptf_part_simple_orc diff --git ql/src/test/results/clientpositive/vector_string_decimal.q.out ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out similarity index 56% rename from ql/src/test/results/clientpositive/vector_string_decimal.q.out rename to ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out index 59b0588..d792c46 100644 --- ql/src/test/results/clientpositive/vector_string_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out @@ -56,48 +56,52 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: orc_decimal - Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterDoubleColumnInList(col 2:double, values [1.0E8, 2.0E8])(children: CastDecimalToDouble(col 0:decimal(18,0)) -> 2:double) - predicate: (UDFToDouble(id)) IN (1.0E8, 2.0E8) (type: boolean) - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: id (type: decimal(18,0)) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_decimal + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDoubleColumnInList(col 2:double, values [1.0E8, 2.0E8])(children: CastDecimalToDouble(col 0:decimal(18,0)) -> 2:double) + predicate: (UDFToDouble(id)) IN (1.0E8, 2.0E8) (type: boolean) + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: decimal(18,0)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out new file mode 100644 index 0000000..4429e9a --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out @@ -0,0 +1,400 @@ +PREHOOK: query: explain vectorization detail +select 'key1', 'value1' from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select 'key1', 'value1' from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Row Limit Per Split: 1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: 'key1' (type: string), 'value1' (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13, 14] + selectExpressions: ConstantVectorExpression(val key1) -> 13:string, ConstantVectorExpression(val value1) -> 14:string + Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [string, string] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +_c0 _c1 +key1 value1 +PREHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: explain vectorization detail +insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Row Limit Per Split: 1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: 17.29 (type: decimal(18,9)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + selectExpressions: ConstantVectorExpression(val 17.29) -> 13:decimal(18,9) + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.decimal_2 + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(18,9)] + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.decimal_2 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@decimal_2 +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +_col0 +PREHOOK: query: select count(*) from decimal_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from decimal_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +_c0 +1 +PREHOOK: query: drop table decimal_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_2 +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: drop table decimal_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_2 +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: explain vectorization detail +select count(1) from (select * from (Select 1 a) x order by x.a) y +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select count(1) from (select * from (Select 1 a) x order by x.a) y +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false +#### A masked pattern was here #### + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +_c0 +1 +PREHOOK: query: explain vectorization detail +create temporary table dual as select 1 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain vectorization detail +create temporary table dual as select 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dual + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false +#### A masked pattern was here #### + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: _c0 int + input format: org.apache.hadoop.mapred.TextInputFormat +#### A masked pattern was here #### + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dual + isTemporary: true + + Stage: Stage-3 + Stats Work + Basic Stats Work: + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create temporary table dual as select 1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@dual +POSTHOOK: query: create temporary table dual as select 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dual +_c0 +PREHOOK: query: select * from dual +PREHOOK: type: QUERY +PREHOOK: Input: default@dual +#### A masked pattern was here #### +POSTHOOK: query: select * from dual +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dual +#### A masked pattern was here #### +dual._c0 +1 diff --git ql/src/test/results/clientpositive/llap/vector_udf2.q.out ql/src/test/results/clientpositive/llap/vector_udf2.q.out new file mode 100644 index 0000000..8e3ccc9 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_udf2.q.out @@ -0,0 +1,330 @@ +PREHOOK: query: drop table varchar_udf_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_udf_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_udf_2 +POSTHOOK: query: create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_udf_2 +PREHOOK: query: insert overwrite table varchar_udf_2 + select key, value, key, value from src where key = '238' limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar_udf_2 +POSTHOOK: query: insert overwrite table varchar_udf_2 + select key, value, key, value from src where key = '238' limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar_udf_2 +POSTHOOK: Lineage: varchar_udf_2.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_2.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain vectorization expression +select + c1 LIKE '%38%', + c2 LIKE 'val_%', + c3 LIKE '%38', + c1 LIKE '%3x8%', + c2 LIKE 'xval_%', + c3 LIKE '%x38' +from varchar_udf_2 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select + c1 LIKE '%38%', + c2 LIKE 'val_%', + c3 LIKE '%38', + c1 LIKE '%3x8%', + c2 LIKE 'xval_%', + c3 LIKE '%x38' +from varchar_udf_2 limit 1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: varchar_udf_2 + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Select Operator + expressions: (c1 like '%38%') (type: boolean), (c2 like 'val_%') (type: boolean), (c3 like '%38') (type: boolean), (c1 like '%3x8%') (type: boolean), (c2 like 'xval_%') (type: boolean), (c3 like '%x38') (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5, 6, 7, 8, 9, 10] + selectExpressions: SelectStringColLikeStringScalar(col 0:string) -> 5:boolean, SelectStringColLikeStringScalar(col 1:string) -> 6:boolean, SelectStringColLikeStringScalar(col 2:varchar(10)) -> 7:boolean, SelectStringColLikeStringScalar(col 0:string) -> 8:boolean, SelectStringColLikeStringScalar(col 1:string) -> 9:boolean, SelectStringColLikeStringScalar(col 2:varchar(10)) -> 10:boolean + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select + c1 LIKE '%38%', + c2 LIKE 'val_%', + c3 LIKE '%38', + c1 LIKE '%3x8%', + c2 LIKE 'xval_%', + c3 LIKE '%x38' +from varchar_udf_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_2 +#### A masked pattern was here #### +POSTHOOK: query: select + c1 LIKE '%38%', + c2 LIKE 'val_%', + c3 LIKE '%38', + c1 LIKE '%3x8%', + c2 LIKE 'xval_%', + c3 LIKE '%x38' +from varchar_udf_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_2 +#### A masked pattern was here #### +true true true false false false +PREHOOK: query: drop table varchar_udf_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_udf_2 +PREHOOK: Output: default@varchar_udf_2 +POSTHOOK: query: drop table varchar_udf_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_udf_2 +POSTHOOK: Output: default@varchar_udf_2 +PREHOOK: query: create temporary table HIVE_14349 (a string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@HIVE_14349 +POSTHOOK: query: create temporary table HIVE_14349 (a string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@HIVE_14349 +PREHOOK: query: insert into HIVE_14349 values('XYZa'), ('badXYZa') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hive_14349 +POSTHOOK: query: insert into HIVE_14349 values('XYZa'), ('badXYZa') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hive_14349 +POSTHOOK: Lineage: hive_14349.a SCRIPT [] +PREHOOK: query: explain vectorization expression +select * from HIVE_14349 where a LIKE 'XYZ%a%' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from HIVE_14349 where a LIKE 'XYZ%a%' +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hive_14349 + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringColLikeStringScalar(col 0:string, pattern XYZ%a%) + predicate: (a like 'XYZ%a%') (type: boolean) + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a%' +PREHOOK: type: QUERY +PREHOOK: Input: default@hive_14349 +#### A masked pattern was here #### +POSTHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a%' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive_14349 +#### A masked pattern was here #### +XYZa +PREHOOK: query: insert into HIVE_14349 values ('XYZab'), ('XYZabBAD'), ('badXYZab'), ('badXYZabc') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hive_14349 +POSTHOOK: query: insert into HIVE_14349 values ('XYZab'), ('XYZabBAD'), ('badXYZab'), ('badXYZabc') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hive_14349 +POSTHOOK: Lineage: hive_14349.a SCRIPT [] +PREHOOK: query: explain vectorization expression +select * from HIVE_14349 where a LIKE 'XYZ%a_' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from HIVE_14349 where a LIKE 'XYZ%a_' +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hive_14349 + Statistics: Num rows: 6 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringColLikeStringScalar(col 0:string, pattern XYZ%a_) + predicate: (a like 'XYZ%a_') (type: boolean) + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a_' +PREHOOK: type: QUERY +PREHOOK: Input: default@hive_14349 +#### A masked pattern was here #### +POSTHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a_' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive_14349 +#### A masked pattern was here #### +XYZab +PREHOOK: query: drop table HIVE_14349 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hive_14349 +PREHOOK: Output: default@hive_14349 +POSTHOOK: query: drop table HIVE_14349 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hive_14349 +POSTHOOK: Output: default@hive_14349 diff --git ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out similarity index 72% rename from ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out rename to ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out index 1761b5a..647fcb7 100644 --- ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out @@ -124,33 +124,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), UDFToBoolean(s) (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 12 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s (type: string), UDFToBoolean(s) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorization_div0.q.out ql/src/test/results/clientpositive/llap/vectorization_div0.q.out new file mode 100644 index 0000000..1abfa95 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorization_div0.q.out @@ -0,0 +1,831 @@ +PREHOOK: query: explain vectorization expression +select cint / 0, ctinyint / 0, cbigint / 0, cdouble / 0.0 from alltypesorc limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select cint / 0, ctinyint / 0, cbigint / 0, cdouble / 0.0 from alltypesorc limit 100 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 220184 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Select Operator + expressions: (UDFToDouble(cint) / 0.0) (type: double), (UDFToDouble(ctinyint) / 0.0) (type: double), (UDFToDouble(cbigint) / 0.0) (type: double), (cdouble / 0.0) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [14, 15, 16, 13] + selectExpressions: DoubleColDivideDoubleScalar(col 13:double, val 0.0)(children: CastLongToDouble(col 2:int) -> 13:double) -> 14:double, DoubleColDivideDoubleScalar(col 13:double, val 0.0)(children: CastLongToDouble(col 0:tinyint) -> 13:double) -> 15:double, DoubleColDivideDoubleScalar(col 13:double, val 0.0)(children: CastLongToDouble(col 3:bigint) -> 13:double) -> 16:double, DoubleColDivideDoubleScalar(col 5:double, val 0.0) -> 13:double + Statistics: Num rows: 12288 Data size: 393216 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select cint / 0, ctinyint / 0, cbigint / 0, cdouble / 0.0 from alltypesorc limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select cint / 0, ctinyint / 0, cbigint / 0, cdouble / 0.0 from alltypesorc limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: explain vectorization expression +select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) +from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) +from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val 0), FilterLongColLessLongScalar(col 3:bigint, val 100000000)) + predicate: ((cbigint < 100000000) and (cbigint > 0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13, 16, 18] + selectExpressions: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 13:bigint, DoubleColDivideDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 15:double) -> 16:double, DecimalScalarDivideDecimalColumn(val 1.2, col 17:decimal(19,0))(children: CastLongToDecimal(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 17:decimal(19,0)) -> 18:decimal(22,21) + Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(22,21)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) +from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) +from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-985319 NULL -0.000001217879691754650 +-985319 2.0297994862577501E-4 -0.000001217879691754650 +-63925 0.11256941728588189 -0.000018771998435666797 +0 NULL NULL +0 NULL NULL +0 NULL NULL +0 NULL NULL +0 NULL NULL +0 NULL NULL +0 NULL NULL +0 NULL NULL +0 NULL NULL +392309 NULL 0.000003058813333367320 +673083 -0.010691103474608629 0.000001782841046349410 +2331159 NULL 0.000000514765402102559 +2342037 NULL 0.000000512374484263058 +3533105 -5.660743170667161E-5 0.000000339644590240030 +3768727 0.004139594085748318 0.000000318409903397089 +4728619 NULL 0.000000253773881972728 +5391403 NULL 0.000000222576572369010 +7022666 -0.0010246820794268159 0.000000170875277280736 +7470430 NULL 0.000000160633323650714 +8276429 NULL 0.000000144990067576246 +8286860 -8.683626850218298E-4 0.000000144807562816314 +8299981 -8.669899364829872E-4 0.000000144578644216174 +9247593 NULL 0.000000129763496295739 +9821695 -7.326637611939691E-4 0.000000122178503812224 +10000738 0.001559984873116364 0.000000119991144653525 +10081828 0.0015474376273826532 0.000000119026033770860 +10745355 -6.696847149303117E-4 0.000000111676161466978 +11127199 -1.797397530142132E-5 0.000000107843851808528 +11722580 NULL 0.000000102366543883684 +12649396 NULL 0.000000094866189658384 +13126214 -1.5236685917203544E-5 0.000000091420115503221 +14042667 NULL 0.000000085453852889910 +14943972 -1.3383322720358416E-5 0.000000080299936322150 +16259022 NULL 0.000000073805177211766 +16531556 -1.2098074736582569E-5 0.000000072588448419495 +16596157 NULL 0.000000072305895876979 +17058489 -1.1724367849930905E-5 0.000000070346207099585 +17247320 -4.172242412154468E-4 0.000000069576026884177 +19004427 8.209139901981786E-4 0.000000063143182375349 +19498517 NULL 0.000000061543141973310 +20165679 7.736411950224934E-4 0.000000059507046601307 +20547875 NULL 0.000000058400199534015 +23264783 NULL 0.000000051580107151655 +23475527 6.645644206411213E-4 0.000000051117063314489 +24379905 NULL 0.000000049220864478348 +24514624 -2.935390728407664E-4 0.000000048950373458716 +25154198 -2.860755091456305E-4 0.000000047705754721339 +25245192 -7.922300610745999E-6 0.000000047533803664476 +26610943 NULL 0.000000045094230595286 +27520143 5.668938566198584E-4 0.000000043604424584567 +27818379 NULL 0.000000043136949137115 +28400244 NULL 0.000000042253158106670 +28698999 5.43607810153936E-4 0.000000041813305056389 +28806400 -6.9429015774272385E-6 0.000000041657409464563 +29920877 5.214085135271938E-4 0.000000040105776311303 +33126539 NULL 0.000000036224732079617 +34603086 NULL 0.000000034678987879867 +35156265 NULL 0.000000034133318769784 +35862260 NULL 0.000000033461360215447 +36123797 -1.992038655294182E-4 0.000000033219099310075 +36341671 -1.980096072082101E-4 0.000000033019945615599 +36413215 -5.4925114412446145E-6 0.000000032955068647468 +36578596 4.2650625518814335E-4 0.000000032806070522772 +36796441 -1.955623914823719E-4 0.000000032611849607955 +39723587 NULL 0.000000030208752296211 +39985709 -1.7996429674411925E-4 0.000000030010722080731 +40018606 NULL 0.000000029986051987918 +41003161 NULL 0.000000029266036342905 +41158231 3.790493328053871E-4 0.000000029155772025285 +41848817 NULL 0.000000028674645689507 +44047567 -1.633688416888043E-4 0.000000027243275434487 +45125678 NULL 0.000000026592398234992 +45180154 NULL 0.000000026560334433566 +45717793 3.4124569399052136E-4 0.000000026247986205283 +46163162 NULL 0.000000025994753132379 +46525838 3.353190543284787E-4 0.000000025792120068853 +48626663 NULL 0.000000024677819244969 +49102701 -1.465499830650864E-4 0.000000024438574163161 +50300445 -1.4306036457530346E-4 0.000000023856647789100 +50929325 -1.412938420055636E-4 0.000000023562063702984 +52422534 -1.3726921327381848E-4 0.000000022890919389742 +52667422 2.9621727070673783E-4 0.000000022784483356713 +52962061 2.945693522010029E-4 0.000000022657728520044 +53695172 NULL 0.000000022348377988248 +54760317 NULL 0.000000021913678841560 +55020655 2.835480602693661E-4 0.000000021809991175132 +56102034 NULL 0.000000021389598815615 +56131313 NULL 0.000000021378441655195 +56838351 -3.5187509222426247E-6 0.000000021112505533456 +56997841 -3.5089048372902406E-6 0.000000021053429023741 +57778807 -1.2454393528755274E-4 0.000000020768860803928 +58080381 NULL 0.000000020661021490200 +58307527 NULL 0.000000020580533281749 +58536385 -1.2293208745295768E-4 0.000000020500070170032 +59347745 NULL 0.000000020219807846111 +60229567 NULL 0.000000019923769334088 +60330397 NULL 0.000000019890470801974 +PREHOOK: query: explain vectorization expression +select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) +from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) +from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -500.0), FilterDoubleColLessDoubleScalar(col 5:double, val -199.0)) + predicate: ((cdouble < -199.0) and (cdouble >= -500.0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13, 16, 17, 15, 18] + selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 16:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 17:double, DoubleScalarDivideDoubleColumn(val 3.0, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 15:double, DoubleScalarDivideDoubleColumn(val 1.2, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 18:double + Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 1, 3, 4] + Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) +from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) +from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-292.0 NULL 1.0 NULL -0.010273972602739725 -0.00410958904109589 +-290.0 NULL 1.0 NULL -0.010344827586206896 -0.004137931034482759 +-289.0 NULL 1.0 NULL -0.010380622837370242 -0.004152249134948096 +-281.0 NULL 1.0 NULL -0.010676156583629894 -0.004270462633451957 +-279.0 NULL 1.0 NULL -0.010752688172043012 -0.004301075268817204 +-274.0 6888911.518248175 1.0 6888911.518248175 -0.010948905109489052 -0.00437956204379562 +-273.0 6028764.868131869 1.0 6028764.868131869 -0.01098901098901099 -0.004395604395604396 +-257.0 6404096.53307393 1.0 6404096.53307393 -0.011673151750972763 -0.004669260700389105 +-250.0 6583411.236 1.0 6583411.236 -0.012 -0.0048 +-247.0 NULL 1.0 NULL -0.012145748987854251 -0.004858299595141701 +-247.0 -7546669.174089069 1.0 -7546669.174089069 -0.012145748987854251 -0.004858299595141701 +-246.0 NULL 1.0 NULL -0.012195121951219513 -0.004878048780487805 +-237.0 NULL 1.0 NULL -0.012658227848101266 -0.005063291139240506 +-236.0 NULL 1.0 NULL -0.012711864406779662 -0.005084745762711864 +-229.0 7187130.170305677 1.0 7187130.170305677 -0.013100436681222707 -0.005240174672489083 +-228.0 8278779.631578947 1.0 8278779.631578947 -0.013157894736842105 -0.005263157894736842 +-225.0 NULL 1.0 NULL -0.013333333333333334 -0.005333333333333333 +-210.0 -8876320.40952381 1.0 -8876320.40952381 -0.014285714285714285 -0.005714285714285714 +-201.0 NULL 1.0 NULL -0.014925373134328358 -0.005970149253731343 +-199.0 NULL 1.0 NULL -0.01507537688442211 -0.006030150753768844 +-189.0 NULL 1.0 NULL -0.015873015873015872 -0.006349206349206349 +-188.0 NULL 1.0 NULL -0.015957446808510637 -0.006382978723404255 +-184.0 8944852.222826088 1.0 8944852.222826088 -0.016304347826086956 -0.006521739130434782 +-183.0 8993731.196721312 1.0 8993731.196721312 -0.01639344262295082 -0.006557377049180328 +-181.0 NULL 1.0 NULL -0.016574585635359115 -0.0066298342541436465 +-179.0 NULL 1.0 NULL -0.01675977653631285 -0.0067039106145251395 +-169.0 9738774.01775148 1.0 9738774.01775148 -0.01775147928994083 -0.007100591715976331 +-164.0 NULL 1.0 NULL -0.018292682926829267 -0.007317073170731707 +-161.0 NULL 1.0 NULL -0.018633540372670808 -0.007453416149068323 +-154.0 1.2256894519480519E7 1.0 1.2256894519480519E7 -0.01948051948051948 -0.007792207792207792 +-152.0 NULL 1.0 NULL -0.019736842105263157 -0.007894736842105263 +-148.0 NULL 1.0 NULL -0.02027027027027027 -0.008108108108108109 +-140.0 NULL 1.0 NULL -0.02142857142857143 -0.008571428571428572 +-138.0 NULL 1.0 NULL -0.021739130434782608 -0.008695652173913044 +-137.0 NULL 1.0 NULL -0.021897810218978103 -0.00875912408759124 +-132.0 NULL 1.0 NULL -0.022727272727272728 -0.00909090909090909 +-129.0 1.2758548906976745E7 1.0 1.2758548906976745E7 -0.023255813953488372 -0.009302325581395349 +-128.0 NULL 1.0 NULL -0.0234375 -0.009375 +-126.0 NULL 1.0 NULL -0.023809523809523808 -0.009523809523809523 +-126.0 -1.4793867349206349E7 1.0 -1.4793867349206349E7 -0.023809523809523808 -0.009523809523809523 +-116.0 NULL 1.0 NULL -0.02586206896551724 -0.010344827586206896 +-113.0 NULL 1.0 NULL -0.02654867256637168 -0.010619469026548672 +-113.0 -1.6495816690265486E7 1.0 -1.6495816690265486E7 -0.02654867256637168 -0.010619469026548672 +-96.0 NULL 1.0 NULL -0.03125 -0.012499999999999999 +-94.0 -1.9830077510638297E7 1.0 -1.9830077510638297E7 -0.031914893617021274 -0.01276595744680851 +-93.0 NULL 1.0 NULL -0.03225806451612903 -0.012903225806451613 +-77.0 2.4513789038961038E7 1.0 2.4513789038961038E7 -0.03896103896103896 -0.015584415584415584 +-69.0 2.735596747826087E7 1.0 2.735596747826087E7 -0.043478260869565216 -0.017391304347826087 +-62.0 NULL 1.0 NULL -0.04838709677419355 -0.01935483870967742 +-62.0 3.0444544451612905E7 1.0 3.0444544451612905E7 -0.04838709677419355 -0.01935483870967742 +-60.0 NULL 1.0 NULL -0.05 -0.02 +-57.0 -3.27022330877193E7 1.0 -3.27022330877193E7 -0.05263157894736842 -0.021052631578947368 +-49.0 3.35888328367347E7 1.0 3.35888328367347E7 -0.061224489795918366 -0.024489795918367346 +-46.0 3.577940889130435E7 1.0 3.577940889130435E7 -0.06521739130434782 -0.02608695652173913 +-38.0 4.3311916026315786E7 1.0 4.3311916026315786E7 -0.07894736842105263 -0.031578947368421054 +-28.0 5.878045746428572E7 1.0 5.878045746428572E7 -0.10714285714285714 -0.04285714285714286 +-28.0 6.741291985714285E7 1.0 6.741291985714285E7 -0.10714285714285714 -0.04285714285714286 +-21.0 8.988389314285715E7 1.0 8.988389314285715E7 -0.14285714285714285 -0.05714285714285714 +-20.0 NULL 1.0 NULL -0.15 -0.06 +-17.0 NULL 1.0 NULL -0.17647058823529413 -0.07058823529411765 +-12.0 -1.5533560716666666E8 1.0 -1.5533560716666666E8 -0.25 -0.09999999999999999 +-3.0 NULL 1.0 NULL -1.0 -0.39999999999999997 +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +PREHOOK: query: explain vectorization expression +select cint, cbigint, ctinyint, (cint / (cint - 528534767)) as c1, (cbigint / (cbigint - 1018195815)) as c2, (ctinyint / ctinyint) as c3, (cint % (cint - 528534767)) as c4, (cbigint % (cbigint - 1018195815)), (ctinyint % ctinyint) as c3 +from alltypesorc where cint > 500000000 or cdouble > 1000000000 or ctinyint = 0 order by c1, c2 limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select cint, cbigint, ctinyint, (cint / (cint - 528534767)) as c1, (cbigint / (cbigint - 1018195815)) as c2, (ctinyint / ctinyint) as c3, (cint % (cint - 528534767)) as c4, (cbigint % (cbigint - 1018195815)), (ctinyint % ctinyint) as c3 +from alltypesorc where cint > 500000000 or cdouble > 1000000000 or ctinyint = 0 order by c1, c2 limit 100 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 220184 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 2:int, val 500000000), FilterDoubleColGreaterDoubleScalar(col 5:double, val 1.0E9), FilterLongColEqualLongScalar(col 0:tinyint, val 0)) + predicate: ((cdouble > 1.0E9) or (cint > 500000000) or (ctinyint = 0)) (type: boolean) + Statistics: Num rows: 4193 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int), cbigint (type: bigint), ctinyint (type: tinyint), (cint / (cint - 528534767)) (type: double), (cbigint / (cbigint - 1018195815)) (type: double), (ctinyint / ctinyint) (type: double), (cint % (cint - 528534767)) (type: int), (cbigint % (cbigint - 1018195815)) (type: bigint), (ctinyint % ctinyint) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 0, 14, 15, 16, 17, 18, 13] + selectExpressions: LongColDivideLongColumn(col 2:int, col 13:int)(children: LongColSubtractLongScalar(col 2:int, val 528534767) -> 13:int) -> 14:double, LongColDivideLongColumn(col 3:bigint, col 13:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 1018195815) -> 13:bigint) -> 15:double, LongColDivideLongColumn(col 0:tinyint, col 0:tinyint) -> 16:double, LongColModuloLongColumn(col 2:int, col 13:int)(children: LongColSubtractLongScalar(col 2:int, val 528534767) -> 13:int) -> 17:int, LongColModuloLongColumn(col 3:bigint, col 13:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 1018195815) -> 13:bigint) -> 18:bigint, LongColModuloLongColumn(col 0:tinyint, col 0:tinyint) -> 13:tinyint + Statistics: Num rows: 4193 Data size: 217816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: double), _col4 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4193 Data size: 217816 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: tinyint), _col5 (type: double), _col6 (type: int), _col7 (type: bigint), _col8 (type: tinyint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: tinyint), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: int), VALUE._col5 (type: bigint), VALUE._col6 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 4, 0, 1, 5, 6, 7, 8] + Statistics: Num rows: 4193 Data size: 217816 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 5216 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 5216 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select cint, cbigint, ctinyint, (cint / (cint - 528534767)) as c1, (cbigint / (cbigint - 1018195815)) as c2, (ctinyint / ctinyint) as c3, (cint % (cint - 528534767)) as c4, (cbigint % (cbigint - 1018195815)), (ctinyint % ctinyint) as c3 +from alltypesorc where cint > 500000000 or cdouble > 1000000000 or ctinyint = 0 order by c1, c2 limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select cint, cbigint, ctinyint, (cint / (cint - 528534767)) as c1, (cbigint / (cbigint - 1018195815)) as c2, (ctinyint / ctinyint) as c3, (cint % (cint - 528534767)) as c4, (cbigint % (cbigint - 1018195815)), (ctinyint % ctinyint) as c3 +from alltypesorc where cint > 500000000 or cdouble > 1000000000 or ctinyint = 0 order by c1, c2 limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +528534767 NULL -50 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 33 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 +528534767 NULL NULL NULL NULL NULL NULL NULL NULL +528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 16 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 62 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -51 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -62 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 39 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -32 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -7 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 24 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -24 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -44 NULL NULL 1.0 NULL NULL 0 +NULL 1018195815 0 NULL NULL NULL NULL NULL NULL +528534767 NULL 24 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 4 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -57 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 28 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -16 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 46 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -16 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -54 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -19 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 5 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 19 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -33 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 18 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 30 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -36 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -40 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 21 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -59 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 0 NULL NULL NULL NULL NULL NULL +528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -33 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -30 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -53 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 27 NULL NULL 1.0 NULL NULL 0 +528534767 NULL NULL NULL NULL NULL NULL NULL NULL +528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 43 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 41 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 13 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 10 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 2 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -37 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -43 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -1 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -12 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 0 NULL NULL NULL NULL NULL NULL +528534767 NULL 26 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 9 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -13 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -47 NULL NULL 1.0 NULL NULL 0 diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out new file mode 100644 index 0000000..c8959ef --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -0,0 +1,943 @@ +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 183488 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 20400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 7 + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 7 + Processor Tree: + ListSink + +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-1887561756 -10011.0 +-1887561756 -13877.0 +-1887561756 -2281.0 +-1887561756 -8881.0 +-1887561756 10361.0 +-1887561756 1839.0 +-1887561756 9531.0 +PREHOOK: query: explain vectorization detail +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:tinyint) + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5, 1] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 5] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: smallint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double, VALUE._col0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-64 -10462.0 -10462 +-64 -15920.0 -15920 +-64 -1600.0 -1600 +-64 -200.0 -200 +-64 -2919.0 -2919 +-64 -3097.0 -3097 +-64 -3586.0 -3586 +-64 -4018.0 -4018 +-64 -4040.0 -4040 +-64 -4803.0 -4803 +-64 -6907.0 -6907 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -8080.0 -8080 +-64 -9842.0 -9842 +PREHOOK: query: explain vectorization detail +select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: ctinyint (type: tinyint), (cdouble + 1.0) (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 13] + selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: avg(_col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDouble(col 13:double) -> struct + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 128 Data size: 10628 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0] + valueColumnNums: [1] + Statistics: Num rows: 128 Data size: 10628 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: struct) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [double] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:tinyint, VALUE._col0:struct + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 1:struct) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-46 3033.55 +-47 -574.6428571428571 +-48 1672.909090909091 +-49 768.7659574468086 +-50 -960.0192307692307 +-51 -96.46341463414635 +-52 2810.705882352941 +-53 -532.7567567567568 +-54 2712.7272727272725 +-55 2385.595744680851 +-56 2595.818181818182 +-57 1867.0535714285713 +-58 3483.2444444444445 +-59 318.27272727272725 +-60 1071.82 +-61 914.3404255319149 +-62 245.69387755102042 +-63 2178.7272727272725 +-64 373.52941176470586 +NULL 9370.0945309795 +PREHOOK: query: explain vectorization detail +select distinct(ctinyint) from alltypesorc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select distinct(ctinyint) from alltypesorc limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ctinyint (type: tinyint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-46 +-47 +-48 +-49 +-50 +-51 +-52 +-53 +-54 +-55 +-56 +-57 +-58 +-59 +-60 +-61 +-62 +-63 +-64 +NULL +PREHOOK: query: explain vectorization detail +select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: ctinyint (type: tinyint), cdouble (type: double) + outputColumnNames: ctinyint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5] + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint, col 5:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ctinyint (type: tinyint), cdouble (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0] + valueColumnNums: [] + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:tinyint, KEY._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint, col 1:double + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1:double) -> bigint + className: VectorGroupByOperator + groupByMode: COMPLETE + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0] + keys: _col0 (type: tinyint) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-46 24 +-47 22 +-48 29 +-49 26 +-50 30 +-51 21 +-52 33 +-53 22 +-54 26 +-55 29 +-56 36 +-57 35 +-58 23 +-59 31 +-60 27 +-61 25 +-62 27 +-63 19 +-64 24 +NULL 2932 +PREHOOK: query: explain vectorization detail +select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +PREHOOK: query: explain vectorization detail +select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:tinyint) + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 5:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: cdouble (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:double, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:double + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: bigint), _col0 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-10462.0 -64 +-1121.0 -89 +-11322.0 -101 +-11492.0 -78 +-15920.0 -64 +-4803.0 -64 +-6907.0 -64 +-7196.0 -2009 +-8080.0 -64 +-8118.0 -80 +-9842.0 -64 +10496.0 -67 +15601.0 -1733 +3520.0 -86 +4811.0 -115 +5241.0 -80 +557.0 -75 +7705.0 -88 +9452.0 -76 +NULL -32768 diff --git ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out index bca2d2a..f321770 100644 --- ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out @@ -1,3 +1,126 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: abs(ctinyint) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + selectExpressions: FuncAbsLongToLong(col 0:tinyint) -> 13:int + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 13:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git ql/src/test/results/clientpositive/llap/vectorization_parquet_projection.q.out ql/src/test/results/clientpositive/llap/vectorization_parquet_projection.q.out new file mode 100644 index 0000000..2b5a21e --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorization_parquet_projection.q.out @@ -0,0 +1,684 @@ +PREHOOK: query: DROP TABLE IF EXISTS parquet_types_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS parquet_types_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: DROP TABLE IF EXISTS parquet_project_test +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS parquet_project_test +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_project_test( +cint int, +m1 map +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_project_test +POSTHOOK: query: CREATE TABLE parquet_project_test( +cint int, +m1 map +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_project_test +PREHOOK: query: insert into parquet_project_test +select ctinyint, map("color","red") from parquet_types_staging +where ctinyint = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_project_test +POSTHOOK: query: insert into parquet_project_test +select ctinyint, map("color","red") from parquet_types_staging +where ctinyint = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_project_test +POSTHOOK: Lineage: parquet_project_test.cint EXPRESSION [] +POSTHOOK: Lineage: parquet_project_test.m1 EXPRESSION [] +PREHOOK: query: insert into parquet_project_test +select ctinyint, map("color","green") from parquet_types_staging +where ctinyint = 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_project_test +POSTHOOK: query: insert into parquet_project_test +select ctinyint, map("color","green") from parquet_types_staging +where ctinyint = 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_project_test +POSTHOOK: Lineage: parquet_project_test.cint EXPRESSION [] +POSTHOOK: Lineage: parquet_project_test.m1 EXPRESSION [] +PREHOOK: query: insert into parquet_project_test +select ctinyint, map("color","blue") from parquet_types_staging +where ctinyint = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_project_test +POSTHOOK: query: insert into parquet_project_test +select ctinyint, map("color","blue") from parquet_types_staging +where ctinyint = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_project_test +POSTHOOK: Lineage: parquet_project_test.cint EXPRESSION [] +POSTHOOK: Lineage: parquet_project_test.m1 EXPRESSION [] +PREHOOK: query: explain vectorization select * from parquet_project_test +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select * from parquet_project_test +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_project_test + Statistics: Num rows: 22 Data size: 20328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), m1 (type: map) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 20328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 20328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type MAP not enabled (map) since hive.vectorized.complex.types.enabled IS false + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from parquet_project_test +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +POSTHOOK: query: select * from parquet_project_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +1 {"color":"red"} +1 {"color":"red"} +1 {"color":"red"} +1 {"color":"red"} +1 {"color":"red"} +1 {"color":"red"} +1 {"color":"red"} +1 {"color":"red"} +2 {"color":"green"} +2 {"color":"green"} +2 {"color":"green"} +2 {"color":"green"} +2 {"color":"green"} +2 {"color":"green"} +2 {"color":"green"} +3 {"color":"blue"} +3 {"color":"blue"} +3 {"color":"blue"} +3 {"color":"blue"} +3 {"color":"blue"} +3 {"color":"blue"} +3 {"color":"blue"} +PREHOOK: query: explain vectorization select count(*) from parquet_project_test +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select count(*) from parquet_project_test +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_project_test + Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from parquet_project_test +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from parquet_project_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +22 +PREHOOK: query: explain vectorization select cint, count(*) from parquet_project_test +group by cint +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select cint, count(*) from parquet_project_test +group by cint +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_project_test + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cint (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint, count(*) from parquet_project_test +group by cint +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +POSTHOOK: query: select cint, count(*) from parquet_project_test +group by cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +1 8 +2 7 +3 7 +PREHOOK: query: explain vectorization select m1["color"], count(*) from parquet_project_test +group by m1["color"] +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select m1["color"], count(*) from parquet_project_test +group by m1["color"] +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_project_test + Statistics: Num rows: 22 Data size: 20240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: m1['color'] (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 20240 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 20240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 22 Data size: 20240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type MAP not enabled (map) since hive.vectorized.complex.types.enabled IS false + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 10120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 10120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select m1["color"], count(*) from parquet_project_test +group by m1["color"] +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +POSTHOOK: query: select m1["color"], count(*) from parquet_project_test +group by m1["color"] +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +blue 7 +green 7 +red 8 +PREHOOK: query: create table if not exists parquet_nullsplit(key string, val string) partitioned by (len string) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_nullsplit +POSTHOOK: query: create table if not exists parquet_nullsplit(key string, val string) partitioned by (len string) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_nullsplit +PREHOOK: query: insert into table parquet_nullsplit partition(len='1') +values ('one', 'red') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@parquet_nullsplit@len=1 +POSTHOOK: query: insert into table parquet_nullsplit partition(len='1') +values ('one', 'red') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@parquet_nullsplit@len=1 +POSTHOOK: Lineage: parquet_nullsplit PARTITION(len=1).key SCRIPT [] +POSTHOOK: Lineage: parquet_nullsplit PARTITION(len=1).val SCRIPT [] +PREHOOK: query: explain vectorization select count(*) from parquet_nullsplit where len = '1' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select count(*) from parquet_nullsplit where len = '1' +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_nullsplit + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from parquet_nullsplit where len = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_nullsplit +PREHOOK: Input: default@parquet_nullsplit@len=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from parquet_nullsplit where len = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_nullsplit +POSTHOOK: Input: default@parquet_nullsplit@len=1 +#### A masked pattern was here #### +1 +PREHOOK: query: explain vectorization select count(*) from parquet_nullsplit where len = '99' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select count(*) from parquet_nullsplit where len = '99' +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_nullsplit + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (len = '99') (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: unknown + Map Vectorization: + enabled: true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from parquet_nullsplit where len = '99' +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_nullsplit +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from parquet_nullsplit where len = '99' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_nullsplit +#### A masked pattern was here #### +0 +PREHOOK: query: drop table parquet_nullsplit +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_nullsplit +PREHOOK: Output: default@parquet_nullsplit +POSTHOOK: query: drop table parquet_nullsplit +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_nullsplit +POSTHOOK: Output: default@parquet_nullsplit +PREHOOK: query: drop table parquet_project_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_project_test +PREHOOK: Output: default@parquet_project_test +POSTHOOK: query: drop table parquet_project_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_project_test +POSTHOOK: Output: default@parquet_project_test +PREHOOK: query: drop table parquet_types_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: drop table parquet_types_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_types_staging diff --git ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out index e46c7f4..130e137 100644 --- ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out @@ -70,15 +70,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_part - Statistics: Num rows: 200 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: (cdouble + 2.0) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -103,13 +103,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index f56d9ce..ed17e5c 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -54,6 +54,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -91,6 +92,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -140,7 +147,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -158,7 +165,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -196,6 +203,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -233,6 +241,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -240,13 +254,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd @@ -275,6 +289,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -300,8 +315,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -315,14 +332,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -367,14 +397,14 @@ from alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -5110 4607 -PREHOOK: query: explain vectorization expression +4086 3583 +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd @@ -403,6 +433,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (cint) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (cint) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -428,8 +459,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -443,14 +476,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -514,10 +560,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.attr SCRIPT [] POSTHOOK: Lineage: test_1.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -540,6 +586,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -570,6 +617,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -588,10 +641,10 @@ POSTHOOK: Input: default@test_1 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -614,6 +667,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -644,6 +698,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -662,10 +722,10 @@ POSTHOOK: Input: default@test_1 3 4 1 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -688,6 +748,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -718,6 +779,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -754,10 +821,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.attr SCRIPT [] POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -780,6 +847,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -810,6 +878,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -828,10 +902,10 @@ POSTHOOK: Input: default@test_2 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -854,6 +928,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -884,6 +959,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -902,10 +983,10 @@ POSTHOOK: Input: default@test_2 3 4 NULL -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -928,6 +1009,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 @@ -958,6 +1040,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -976,3 +1064,227 @@ POSTHOOK: Input: default@test_2 NULL NULL 4 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 248718130534 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out new file mode 100644 index 0000000..673e607 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out @@ -0,0 +1,214 @@ +PREHOOK: query: create temporary table x (a int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: create temporary table x (a int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +PREHOOK: query: create temporary table y (b int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@y +POSTHOOK: query: create temporary table y (b int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@y +PREHOOK: query: insert into x values(1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@x +POSTHOOK: query: insert into x values(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@x +POSTHOOK: Lineage: x.a SCRIPT [] +PREHOOK: query: insert into y values(1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@y +POSTHOOK: query: insert into y values(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@y +POSTHOOK: Lineage: y.b SCRIPT [] +PREHOOK: query: explain vectorization expression +select count(1) from x, y where a = b +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(1) from x, y where a = b +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: b is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: b (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from x, y where a = b +PREHOOK: type: QUERY +PREHOOK: Input: default@x +PREHOOK: Input: default@y +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from x, y where a = b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@x +POSTHOOK: Input: default@y +#### A masked pattern was here #### +1 diff --git ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out index bca2d2a..96ad3ad 100644 --- ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out @@ -1,3 +1,124 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: abs(ctinyint) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + selectExpressions: FuncAbsLongToLong(col 0:tinyint) -> 13:int + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 13:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index c1dd74c..c084568 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -54,6 +54,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -90,6 +91,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -139,7 +146,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -157,7 +164,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -195,6 +202,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -231,6 +239,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -238,13 +252,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd @@ -272,6 +286,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -297,8 +312,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized @@ -311,14 +328,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -370,7 +400,7 @@ select sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd @@ -398,6 +428,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (cint) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (cint) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -423,8 +454,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized @@ -437,14 +470,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -508,10 +554,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.attr SCRIPT [] POSTHOOK: Lineage: test_1.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -534,6 +580,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -563,6 +610,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -581,10 +634,10 @@ POSTHOOK: Input: default@test_1 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -607,6 +660,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -636,6 +690,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -654,10 +714,10 @@ POSTHOOK: Input: default@test_1 3 4 1 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -680,6 +740,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -709,6 +770,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -745,10 +812,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.attr SCRIPT [] POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -771,6 +838,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -800,6 +868,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -818,10 +892,10 @@ POSTHOOK: Input: default@test_2 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -844,6 +918,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -873,6 +948,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -891,10 +972,10 @@ POSTHOOK: Input: default@test_2 3 4 NULL -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -917,6 +998,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 @@ -946,6 +1028,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -964,3 +1052,227 @@ POSTHOOK: Input: default@test_2 NULL NULL 4 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 248718130534 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 diff --git ql/src/test/results/clientpositive/vector_if_expr_2.q.out ql/src/test/results/clientpositive/vector_if_expr_2.q.out deleted file mode 100644 index fe4f77c..0000000 --- ql/src/test/results/clientpositive/vector_if_expr_2.q.out +++ /dev/null @@ -1,119 +0,0 @@ -PREHOOK: query: drop table if exists foo -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists foo -POSTHOOK: type: DROPTABLE -PREHOOK: query: create temporary table foo (x int, y int) stored as orc -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@foo -POSTHOOK: query: create temporary table foo (x int, y int) stored as orc -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@foo -PREHOOK: query: insert into foo values(1,1),(2,NULL),(3,1) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@foo -POSTHOOK: query: insert into foo values(1,1),(2,NULL),(3,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@foo -POSTHOOK: Lineage: foo.x SCRIPT [] -POSTHOOK: Lineage: foo.y SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -select x, IF(x > 0,y,0) from foo order by x -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -select x, IF(x > 0,y,0) from foo order by x -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: foo - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Select Operator - expressions: x (type: int), if((x > 0), y, 0) (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 4] - selectExpressions: IfExprLongColumnLongScalar(col 3:boolean, col 1:int, val 0)(children: LongColGreaterLongScalar(col 0:int, val 0) -> 3:boolean) -> 4:int - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select x, IF(x > 0,y,0) from foo order by x -PREHOOK: type: QUERY -PREHOOK: Input: default@foo -#### A masked pattern was here #### -POSTHOOK: query: select x, IF(x > 0,y,0) from foo order by x -POSTHOOK: type: QUERY -POSTHOOK: Input: default@foo -#### A masked pattern was here #### -1 1 -2 NULL -3 1 -PREHOOK: query: select x, IF(x > 0,y,0) from foo order by x -PREHOOK: type: QUERY -PREHOOK: Input: default@foo -#### A masked pattern was here #### -POSTHOOK: query: select x, IF(x > 0,y,0) from foo order by x -POSTHOOK: type: QUERY -POSTHOOK: Input: default@foo -#### A masked pattern was here #### -1 1 -2 NULL -3 1 diff --git ql/src/test/results/clientpositive/vector_like_2.q.out ql/src/test/results/clientpositive/vector_like_2.q.out deleted file mode 100644 index 26ff792..0000000 --- ql/src/test/results/clientpositive/vector_like_2.q.out +++ /dev/null @@ -1,125 +0,0 @@ -PREHOOK: query: drop table if exists foo -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists foo -POSTHOOK: type: DROPTABLE -PREHOOK: query: create temporary table foo (a string) stored as orc -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@foo -POSTHOOK: query: create temporary table foo (a string) stored as orc -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@foo -PREHOOK: query: insert into foo values("some foo"),("some bar"),(null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@foo -POSTHOOK: query: insert into foo values("some foo"),("some bar"),(null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@foo -POSTHOOK: Lineage: foo.a SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -select a, a like "%bar" from foo order by a -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -select a, a like "%bar" from foo order by a -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: foo - Statistics: Num rows: 3 Data size: 184 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:string, 1:ROW__ID:struct] - Select Operator - expressions: a (type: string), (a like '%bar') (type: boolean) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] - selectExpressions: SelectStringColLikeStringScalar(col 0:string) -> 2:boolean - Statistics: Num rows: 3 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: a:string - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 184 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 184 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a, a like "%bar" from foo order by a -PREHOOK: type: QUERY -PREHOOK: Input: default@foo -#### A masked pattern was here #### -POSTHOOK: query: select a, a like "%bar" from foo order by a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@foo -#### A masked pattern was here #### -NULL NULL -some bar true -some foo false -PREHOOK: query: select a, a like "%bar" from foo order by a -PREHOOK: type: QUERY -PREHOOK: Input: default@foo -#### A masked pattern was here #### -POSTHOOK: query: select a, a like "%bar" from foo order by a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@foo -#### A masked pattern was here #### -NULL NULL -some bar true -some foo false diff --git ql/src/test/results/clientpositive/vector_order_null.q.out ql/src/test/results/clientpositive/vector_order_null.q.out deleted file mode 100644 index c50e275..0000000 --- ql/src/test/results/clientpositive/vector_order_null.q.out +++ /dev/null @@ -1,1130 +0,0 @@ -PREHOOK: query: create table src_null (a int, b string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_null -POSTHOOK: query: create table src_null (a int, b string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_null -PREHOOK: query: insert into src_null values (1, 'A') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@src_null -POSTHOOK: query: insert into src_null values (1, 'A') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.a SCRIPT [] -POSTHOOK: Lineage: src_null.b SCRIPT [] -col1 col2 -PREHOOK: query: insert into src_null values (null, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@src_null -POSTHOOK: query: insert into src_null values (null, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.a EXPRESSION [] -POSTHOOK: Lineage: src_null.b EXPRESSION [] -_col0 _col1 -PREHOOK: query: insert into src_null values (3, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@src_null -POSTHOOK: query: insert into src_null values (3, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.a SCRIPT [] -POSTHOOK: Lineage: src_null.b EXPRESSION [] -_col0 _col1 -PREHOOK: query: insert into src_null values (2, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@src_null -POSTHOOK: query: insert into src_null values (2, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.a SCRIPT [] -POSTHOOK: Lineage: src_null.b EXPRESSION [] -_col0 _col1 -PREHOOK: query: insert into src_null values (2, 'A') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@src_null -POSTHOOK: query: insert into src_null values (2, 'A') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.a SCRIPT [] -POSTHOOK: Lineage: src_null.b SCRIPT [] -col1 col2 -PREHOOK: query: insert into src_null values (2, 'B') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@src_null -POSTHOOK: query: insert into src_null values (2, 'B') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.a SCRIPT [] -POSTHOOK: Lineage: src_null.b SCRIPT [] -col1 col2 -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a asc, b asc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a asc, b asc -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc, b asc -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc, b asc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -NULL NULL -1 A -2 NULL -2 A -2 B -3 NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a desc, b asc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a desc, b asc -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: -+ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc, b asc -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc, b asc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -3 NULL -2 NULL -2 A -2 B -1 A -NULL NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -2 NULL -3 NULL -NULL NULL -1 A -2 A -2 B -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b desc, a asc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b desc, a asc -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: -+ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc, a asc -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc, a asc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -2 B -1 A -2 A -NULL NULL -2 NULL -3 NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -NULL NULL -1 A -2 NULL -2 A -2 B -3 NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: -+ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -NULL NULL -3 NULL -2 NULL -2 A -2 B -1 A -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b asc nulls last, a -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b asc nulls last, a -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last, a -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last, a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -1 A -2 A -2 B -NULL NULL -2 NULL -3 NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b desc nulls last, a -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b desc nulls last, a -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: -+ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -2 B -1 A -2 A -NULL NULL -2 NULL -3 NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: +- - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -1 A -2 B -2 A -2 NULL -3 NULL -NULL NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: -- - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -2 B -2 A -1 A -3 NULL -2 NULL -NULL NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -2 NULL -3 NULL -NULL NULL -1 A -2 A -2 B diff --git ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out deleted file mode 100644 index fe68e5c..0000000 --- ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out +++ /dev/null @@ -1,2376 +0,0 @@ -PREHOOK: query: DROP TABLE IF EXISTS e011_01 -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS e011_01 -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE IF EXISTS e011_02 -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS e011_02 -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE IF EXISTS e011_03 -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS e011_03 -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE e011_01 ( - c1 decimal(15,2), - c2 decimal(15,2)) - STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@e011_01 -POSTHOOK: query: CREATE TABLE e011_01 ( - c1 decimal(15,2), - c2 decimal(15,2)) - STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@e011_01 -PREHOOK: query: CREATE TABLE e011_02 ( - c1 decimal(15,2), - c2 decimal(15,2)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@e011_02 -POSTHOOK: query: CREATE TABLE e011_02 ( - c1 decimal(15,2), - c2 decimal(15,2)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@e011_02 -PREHOOK: query: CREATE TABLE e011_03 ( - c1 decimal(15,2), - c2 decimal(15,2)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@e011_03 -POSTHOOK: query: CREATE TABLE e011_03 ( - c1 decimal(15,2), - c2 decimal(15,2)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@e011_03 -PREHOOK: query: CREATE TABLE e011_01_small ( - c1 decimal(7,2), - c2 decimal(7,2)) - STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@e011_01_small -POSTHOOK: query: CREATE TABLE e011_01_small ( - c1 decimal(7,2), - c2 decimal(7,2)) - STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@e011_01_small -PREHOOK: query: CREATE TABLE e011_02_small ( - c1 decimal(7,2), - c2 decimal(7,2)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@e011_02_small -POSTHOOK: query: CREATE TABLE e011_02_small ( - c1 decimal(7,2), - c2 decimal(7,2)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@e011_02_small -PREHOOK: query: CREATE TABLE e011_03_small ( - c1 decimal(7,2), - c2 decimal(7,2)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@e011_03_small -POSTHOOK: query: CREATE TABLE e011_03_small ( - c1 decimal(7,2), - c2 decimal(7,2)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@e011_03_small -PREHOOK: query: LOAD DATA - LOCAL INPATH '../../data/files/e011_01.txt' - OVERWRITE - INTO TABLE e011_01 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@e011_01 -POSTHOOK: query: LOAD DATA - LOCAL INPATH '../../data/files/e011_01.txt' - OVERWRITE - INTO TABLE e011_01 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@e011_01 -PREHOOK: query: INSERT INTO TABLE e011_02 - SELECT c1, c2 - FROM e011_01 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -PREHOOK: Output: default@e011_02 -POSTHOOK: query: INSERT INTO TABLE e011_02 - SELECT c1, c2 - FROM e011_01 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -POSTHOOK: Output: default@e011_02 -POSTHOOK: Lineage: e011_02.c1 SIMPLE [(e011_01)e011_01.FieldSchema(name:c1, type:decimal(15,2), comment:null), ] -POSTHOOK: Lineage: e011_02.c2 SIMPLE [(e011_01)e011_01.FieldSchema(name:c2, type:decimal(15,2), comment:null), ] -c1 c2 -PREHOOK: query: INSERT INTO TABLE e011_03 - SELECT c1, c2 - FROM e011_01 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -PREHOOK: Output: default@e011_03 -POSTHOOK: query: INSERT INTO TABLE e011_03 - SELECT c1, c2 - FROM e011_01 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -POSTHOOK: Output: default@e011_03 -POSTHOOK: Lineage: e011_03.c1 SIMPLE [(e011_01)e011_01.FieldSchema(name:c1, type:decimal(15,2), comment:null), ] -POSTHOOK: Lineage: e011_03.c2 SIMPLE [(e011_01)e011_01.FieldSchema(name:c2, type:decimal(15,2), comment:null), ] -c1 c2 -PREHOOK: query: LOAD DATA - LOCAL INPATH '../../data/files/e011_01.txt' - OVERWRITE - INTO TABLE e011_01_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@e011_01_small -POSTHOOK: query: LOAD DATA - LOCAL INPATH '../../data/files/e011_01.txt' - OVERWRITE - INTO TABLE e011_01_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@e011_01_small -PREHOOK: query: INSERT INTO TABLE e011_02_small - SELECT c1, c2 - FROM e011_01_small -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -PREHOOK: Output: default@e011_02_small -POSTHOOK: query: INSERT INTO TABLE e011_02_small - SELECT c1, c2 - FROM e011_01_small -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -POSTHOOK: Output: default@e011_02_small -POSTHOOK: Lineage: e011_02_small.c1 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c1, type:decimal(7,2), comment:null), ] -POSTHOOK: Lineage: e011_02_small.c2 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c2, type:decimal(7,2), comment:null), ] -c1 c2 -PREHOOK: query: INSERT INTO TABLE e011_03_small - SELECT c1, c2 - FROM e011_01_small -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -PREHOOK: Output: default@e011_03_small -POSTHOOK: query: INSERT INTO TABLE e011_03_small - SELECT c1, c2 - FROM e011_01_small -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -POSTHOOK: Output: default@e011_03_small -POSTHOOK: Lineage: e011_03_small.c1 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c1, type:decimal(7,2), comment:null), ] -POSTHOOK: Lineage: e011_03_small.c2 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c2, type:decimal(7,2), comment:null), ] -c1 c2 -PREHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -PREHOOK: Output: default@e011_01 -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -POSTHOOK: Output: default@e011_01 -#### A masked pattern was here #### -_c0 _c1 -PREHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_02 -PREHOOK: Output: default@e011_02 -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_02 -POSTHOOK: Output: default@e011_02 -#### A masked pattern was here #### -_c0 _c1 -PREHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_03 -PREHOOK: Output: default@e011_03 -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_03 -POSTHOOK: Output: default@e011_03 -#### A masked pattern was here #### -_c0 _c1 -PREHOOK: query: ANALYZE TABLE e011_01_small COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -PREHOOK: Output: default@e011_01_small -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE e011_01_small COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -POSTHOOK: Output: default@e011_01_small -#### A masked pattern was here #### -_c0 _c1 -PREHOOK: query: ANALYZE TABLE e011_02_small COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_02_small -PREHOOK: Output: default@e011_02_small -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE e011_02_small COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_02_small -POSTHOOK: Output: default@e011_02_small -#### A masked pattern was here #### -_c0 _c1 -PREHOOK: query: ANALYZE TABLE e011_03_small COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_03_small -PREHOOK: Output: default@e011_03_small -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE e011_03_small COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_03_small -POSTHOOK: Output: default@e011_03_small -#### A masked pattern was here #### -_c0 _c1 -PREHOOK: query: explain vectorization detail -select sum(sum(c1)) over() from e011_01 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(c1)) over() from e011_01 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:c1:decimal(15,2)/DECIMAL_64, 1:c2:decimal(15,2)/DECIMAL_64, 2:ROW__ID:struct] - Select Operator - expressions: c1 (type: decimal(15,2)) - outputColumnNames: c1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(c1) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal64ToDecimal(col 0:decimal(15,2)/DECIMAL_64) -> decimal(25,2) - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: c1:decimal(15,2)/DECIMAL_64, c2:decimal(15,2)/DECIMAL_64 - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(25,2)] - Reduce Output Operator - key expressions: 0 (type: int) - sort order: + - Map-reduce partition columns: 0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:decimal(25,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: decimal(25,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(25,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: 0 ASC NULLS FIRST - partition by: 0 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col0 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(35,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(c1)) over() from e011_01 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(c1)) over() from e011_01 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -#### A masked pattern was here #### -_c0 -16.00 -PREHOOK: query: explain vectorization detail -select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01 - group by e011_01.c1, e011_01.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01 - group by e011_01.c1, e011_01.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:c1:decimal(15,2)/DECIMAL_64, 1:c2:decimal(15,2)/DECIMAL_64, 2:ROW__ID:struct] - Select Operator - expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - outputColumnNames: c1, c2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(c1) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal64ToDecimal(col 0:decimal(15,2)/DECIMAL_64) -> decimal(25,2) - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(15,2)/DECIMAL_64) -> 3:decimal(15,2), ConvertDecimal64ToDecimal(col 1:decimal(15,2)/DECIMAL_64) -> 4:decimal(15,2) - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: c1:decimal(15,2)/DECIMAL_64, c2:decimal(15,2)/DECIMAL_64 - partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(15,2), decimal(15,2)] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(15,2), 1:_col1:decimal(15,2), 2:_col2:decimal(25,2)] - Reduce Output Operator - key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col1 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(15,2), _col1:decimal(15,2), _col2:decimal(25,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(35,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01 - group by e011_01.c1, e011_01.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01 - group by e011_01.c1, e011_01.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -#### A masked pattern was here #### -_c0 -1.00 -3.00 -5.00 -7.00 -PREHOOK: query: explain vectorization detail -select sum(sum(e011_01.c1)) over( - partition by e011_01.c2 order by e011_01.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_01.c1, e011_01.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(e011_01.c1)) over( - partition by e011_01.c2 order by e011_01.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_01.c1, e011_01.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(15,2)) - TableScan - alias: e011_03 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Map Vectorization: - enabled: false - enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(15,2)) - 1 _col0 (type: decimal(15,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(15,2), 1:_col1:decimal(15,2), 2:_col2:decimal(25,2)] - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(15,2), _col1:decimal(15,2), _col2:decimal(25,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(15,2), 1:_col1:decimal(15,2), 2:_col2:decimal(25,2)] - Reduce Output Operator - key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col1 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(15,2), _col1:decimal(15,2), _col2:decimal(25,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(35,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(e011_01.c1)) over( - partition by e011_01.c2 order by e011_01.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_01.c1, e011_01.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -PREHOOK: Input: default@e011_03 -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(e011_01.c1)) over( - partition by e011_01.c2 order by e011_01.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_01.c1, e011_01.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -POSTHOOK: Input: default@e011_03 -#### A masked pattern was here #### -_c0 -1.00 -3.00 -5.00 -7.00 -PREHOOK: query: explain vectorization detail -select sum(sum(e011_01.c1)) over( - partition by e011_03.c2 order by e011_03.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c1, e011_03.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(e011_01.c1)) over( - partition by e011_03.c2 order by e011_03.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c1, e011_03.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_03 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(15,2)) - TableScan - alias: e011_01 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map Vectorization: - enabled: false - enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(15,2)) - 1 _col0 (type: decimal(15,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(15,2), 1:_col1:decimal(15,2), 2:_col2:decimal(25,2)] - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(15,2), _col1:decimal(15,2), _col2:decimal(25,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(15,2), 1:_col1:decimal(15,2), 2:_col2:decimal(25,2)] - Reduce Output Operator - key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col1 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(15,2), _col1:decimal(15,2), _col2:decimal(25,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(35,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(e011_01.c1)) over( - partition by e011_03.c2 order by e011_03.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c1, e011_03.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -PREHOOK: Input: default@e011_03 -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(e011_01.c1)) over( - partition by e011_03.c2 order by e011_03.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c1, e011_03.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -POSTHOOK: Input: default@e011_03 -#### A masked pattern was here #### -_c0 -1.00 -3.00 -5.00 -7.00 -PREHOOK: query: explain vectorization detail -select sum(corr(e011_01.c1, e011_03.c1)) - over(partition by e011_01.c2 order by e011_03.c2) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c2, e011_01.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(corr(e011_01.c1, e011_03.c1)) - over(partition by e011_01.c2 order by e011_03.c2) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c2, e011_01.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(15,2)) - TableScan - alias: e011_03 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(15,2)) - Map Vectorization: - enabled: false - enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(15,2)) - 1 _col0 (type: decimal(15,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: corr(_col0, _col2) - keys: _col1 (type: decimal(15,2)), _col3 (type: decimal(15,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(15,2), 1:_col1:decimal(15,2), 2:_col2:struct] - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(15,2), _col1:decimal(15,2), _col2:struct - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: corr(VALUE._col0) - keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumDouble - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(corr(e011_01.c1, e011_03.c1)) - over(partition by e011_01.c2 order by e011_03.c2) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c2, e011_01.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -PREHOOK: Input: default@e011_03 -#### A masked pattern was here #### -POSTHOOK: query: select sum(corr(e011_01.c1, e011_03.c1)) - over(partition by e011_01.c2 order by e011_03.c2) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c2, e011_01.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -POSTHOOK: Input: default@e011_03 -#### A masked pattern was here #### -sum_window_0 -NULL -NULL -NULL -NULL -PREHOOK: query: explain vectorization detail -select sum(sum(c1)) over() from e011_01_small -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(c1)) over() from e011_01_small -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01_small - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:c1:decimal(7,2)/DECIMAL_64, 1:c2:decimal(7,2)/DECIMAL_64, 2:ROW__ID:struct] - Select Operator - expressions: c1 (type: decimal(7,2)) - outputColumnNames: c1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(c1) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal64(col 0:decimal(7,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: c1:decimal(7,2)/DECIMAL_64, c2:decimal(7,2)/DECIMAL_64 - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(17,2)] - Reduce Output Operator - key expressions: 0 (type: int) - sort order: + - Map-reduce partition columns: 0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:decimal(17,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: decimal(17,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: 0 ASC NULLS FIRST - partition by: 0 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col0 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(27,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(c1)) over() from e011_01_small -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(c1)) over() from e011_01_small -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -#### A masked pattern was here #### -_c0 -16.00 -PREHOOK: query: explain vectorization detail -select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01_small - group by e011_01_small.c1, e011_01_small.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01_small - group by e011_01_small.c1, e011_01_small.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01_small - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:c1:decimal(7,2)/DECIMAL_64, 1:c2:decimal(7,2)/DECIMAL_64, 2:ROW__ID:struct] - Select Operator - expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - outputColumnNames: c1, c2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(c1) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal64(col 0:decimal(7,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(7,2)/DECIMAL_64) -> 3:decimal(7,2), ConvertDecimal64ToDecimal(col 1:decimal(7,2)/DECIMAL_64) -> 4:decimal(7,2) - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: c1:decimal(7,2)/DECIMAL_64, c2:decimal(7,2)/DECIMAL_64 - partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(7,2), decimal(7,2)] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(7,2), 1:_col1:decimal(7,2), 2:_col2:decimal(17,2)] - Reduce Output Operator - key expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col1 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(7,2), _col1:decimal(7,2), _col2:decimal(17,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(27,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01_small - group by e011_01_small.c1, e011_01_small.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01_small - group by e011_01_small.c1, e011_01_small.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -#### A masked pattern was here #### -_c0 -1.00 -3.00 -5.00 -7.00 -PREHOOK: query: explain vectorization detail -select sum(sum(e011_01_small.c1)) over( - partition by e011_01_small.c2 order by e011_01_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_01_small.c1, e011_01_small.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(e011_01_small.c1)) over( - partition by e011_01_small.c2 order by e011_01_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_01_small.c1, e011_01_small.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01_small - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)) - TableScan - alias: e011_03_small - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(7,2)) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Map Vectorization: - enabled: false - enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(7,2)) - 1 _col0 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - keys: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(7,2), 1:_col1:decimal(7,2), 2:_col2:decimal(17,2)] - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(7,2), _col1:decimal(7,2), _col2:decimal(17,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(7,2), 1:_col1:decimal(7,2), 2:_col2:decimal(17,2)] - Reduce Output Operator - key expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col1 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(7,2), _col1:decimal(7,2), _col2:decimal(17,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(27,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(e011_01_small.c1)) over( - partition by e011_01_small.c2 order by e011_01_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_01_small.c1, e011_01_small.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -PREHOOK: Input: default@e011_03_small -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(e011_01_small.c1)) over( - partition by e011_01_small.c2 order by e011_01_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_01_small.c1, e011_01_small.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -POSTHOOK: Input: default@e011_03_small -#### A masked pattern was here #### -_c0 -1.00 -3.00 -5.00 -7.00 -PREHOOK: query: explain vectorization detail -select sum(sum(e011_01_small.c1)) over( - partition by e011_03_small.c2 order by e011_03_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c1, e011_03_small.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(e011_01_small.c1)) over( - partition by e011_03_small.c2 order by e011_03_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c1, e011_03_small.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_03_small - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)) - TableScan - alias: e011_01_small - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(7,2)) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map Vectorization: - enabled: false - enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(7,2)) - 1 _col0 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(7,2), 1:_col1:decimal(7,2), 2:_col2:decimal(17,2)] - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(7,2), _col1:decimal(7,2), _col2:decimal(17,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(7,2), 1:_col1:decimal(7,2), 2:_col2:decimal(17,2)] - Reduce Output Operator - key expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col1 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(7,2), _col1:decimal(7,2), _col2:decimal(17,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(27,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(e011_01_small.c1)) over( - partition by e011_03_small.c2 order by e011_03_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c1, e011_03_small.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -PREHOOK: Input: default@e011_03_small -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(e011_01_small.c1)) over( - partition by e011_03_small.c2 order by e011_03_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c1, e011_03_small.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -POSTHOOK: Input: default@e011_03_small -#### A masked pattern was here #### -_c0 -1.00 -3.00 -5.00 -7.00 -PREHOOK: query: explain vectorization detail -select sum(corr(e011_01_small.c1, e011_03_small.c1)) - over(partition by e011_01_small.c2 order by e011_03_small.c2) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c2, e011_01_small.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(corr(e011_01_small.c1, e011_03_small.c1)) - over(partition by e011_01_small.c2 order by e011_03_small.c2) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c2, e011_01_small.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01_small - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)) - TableScan - alias: e011_03_small - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)) - Map Vectorization: - enabled: false - enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(7,2)) - 1 _col0 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: corr(_col0, _col2) - keys: _col1 (type: decimal(7,2)), _col3 (type: decimal(7,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(7,2), 1:_col1:decimal(7,2), 2:_col2:struct] - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(7,2), _col1:decimal(7,2), _col2:struct - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: corr(VALUE._col0) - keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumDouble - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(corr(e011_01_small.c1, e011_03_small.c1)) - over(partition by e011_01_small.c2 order by e011_03_small.c2) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c2, e011_01_small.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -PREHOOK: Input: default@e011_03_small -#### A masked pattern was here #### -POSTHOOK: query: select sum(corr(e011_01_small.c1, e011_03_small.c1)) - over(partition by e011_01_small.c2 order by e011_03_small.c2) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c2, e011_01_small.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -POSTHOOK: Input: default@e011_03_small -#### A masked pattern was here #### -sum_window_0 -NULL -NULL -NULL -NULL diff --git ql/src/test/results/clientpositive/vector_udf2.q.out ql/src/test/results/clientpositive/vector_udf2.q.out deleted file mode 100644 index 222a901..0000000 --- ql/src/test/results/clientpositive/vector_udf2.q.out +++ /dev/null @@ -1,188 +0,0 @@ -PREHOOK: query: drop table varchar_udf_2 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table varchar_udf_2 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@varchar_udf_2 -POSTHOOK: query: create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@varchar_udf_2 -PREHOOK: query: insert overwrite table varchar_udf_2 - select key, value, key, value from src where key = '238' limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@varchar_udf_2 -POSTHOOK: query: insert overwrite table varchar_udf_2 - select key, value, key, value from src where key = '238' limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@varchar_udf_2 -POSTHOOK: Lineage: varchar_udf_2.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: varchar_udf_2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: varchar_udf_2.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: varchar_udf_2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain vectorization expression -select - c1 LIKE '%38%', - c2 LIKE 'val_%', - c3 LIKE '%38', - c1 LIKE '%3x8%', - c2 LIKE 'xval_%', - c3 LIKE '%x38' -from varchar_udf_2 limit 1 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression -select - c1 LIKE '%38%', - c2 LIKE 'val_%', - c3 LIKE '%38', - c1 LIKE '%3x8%', - c2 LIKE 'xval_%', - c3 LIKE '%x38' -from varchar_udf_2 limit 1 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: varchar_udf_2 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Select Operator - expressions: (c1 like '%38%') (type: boolean), (c2 like 'val_%') (type: boolean), (c3 like '%38') (type: boolean), (c1 like '%3x8%') (type: boolean), (c2 like 'xval_%') (type: boolean), (c3 like '%x38') (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [5, 6, 7, 8, 9, 10] - selectExpressions: SelectStringColLikeStringScalar(col 0:string) -> 5:boolean, SelectStringColLikeStringScalar(col 1:string) -> 6:boolean, SelectStringColLikeStringScalar(col 2:varchar(10)) -> 7:boolean, SelectStringColLikeStringScalar(col 0:string) -> 8:boolean, SelectStringColLikeStringScalar(col 1:string) -> 9:boolean, SelectStringColLikeStringScalar(col 2:varchar(10)) -> 10:boolean - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - - Stage: Stage-0 - Fetch Operator - limit: 1 - Processor Tree: - ListSink - -PREHOOK: query: select - c1 LIKE '%38%', - c2 LIKE 'val_%', - c3 LIKE '%38', - c1 LIKE '%3x8%', - c2 LIKE 'xval_%', - c3 LIKE '%x38' -from varchar_udf_2 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_2 -#### A masked pattern was here #### -POSTHOOK: query: select - c1 LIKE '%38%', - c2 LIKE 'val_%', - c3 LIKE '%38', - c1 LIKE '%3x8%', - c2 LIKE 'xval_%', - c3 LIKE '%x38' -from varchar_udf_2 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_2 -#### A masked pattern was here #### -true true true false false false -PREHOOK: query: drop table varchar_udf_2 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@varchar_udf_2 -PREHOOK: Output: default@varchar_udf_2 -POSTHOOK: query: drop table varchar_udf_2 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@varchar_udf_2 -POSTHOOK: Output: default@varchar_udf_2 -PREHOOK: query: create temporary table HIVE_14349 (a string) stored as orc -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@HIVE_14349 -POSTHOOK: query: create temporary table HIVE_14349 (a string) stored as orc -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@HIVE_14349 -PREHOOK: query: insert into HIVE_14349 values('XYZa'), ('badXYZa') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@hive_14349 -POSTHOOK: query: insert into HIVE_14349 values('XYZa'), ('badXYZa') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@hive_14349 -POSTHOOK: Lineage: hive_14349.a SCRIPT [] -PREHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a%' -PREHOOK: type: QUERY -PREHOOK: Input: default@hive_14349 -#### A masked pattern was here #### -POSTHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a%' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hive_14349 -#### A masked pattern was here #### -XYZa -PREHOOK: query: insert into HIVE_14349 values ('XYZab'), ('XYZabBAD'), ('badXYZab'), ('badXYZabc') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@hive_14349 -POSTHOOK: query: insert into HIVE_14349 values ('XYZab'), ('XYZabBAD'), ('badXYZab'), ('badXYZabc') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@hive_14349 -POSTHOOK: Lineage: hive_14349.a SCRIPT [] -PREHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a_' -PREHOOK: type: QUERY -PREHOOK: Input: default@hive_14349 -#### A masked pattern was here #### -POSTHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a_' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hive_14349 -#### A masked pattern was here #### -XYZab -PREHOOK: query: drop table HIVE_14349 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@hive_14349 -PREHOOK: Output: default@hive_14349 -POSTHOOK: query: drop table HIVE_14349 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@hive_14349 -POSTHOOK: Output: default@hive_14349 diff --git ql/src/test/results/clientpositive/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/vectorization_nested_udf.q.out index bca2d2a..090c6c6 100644 --- ql/src/test/results/clientpositive/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/vectorization_nested_udf.q.out @@ -1,3 +1,97 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: abs(ctinyint) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + selectExpressions: FuncAbsLongToLong(col 0:tinyint) -> 13:int + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 13:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git ql/src/test/results/clientpositive/vectorization_offset_limit.q.out ql/src/test/results/clientpositive/vectorization_offset_limit.q.out deleted file mode 100644 index 0bdbd97..0000000 --- ql/src/test/results/clientpositive/vectorization_offset_limit.q.out +++ /dev/null @@ -1,163 +0,0 @@ -WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint), cdouble (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 2 - Offset of rows: 3 - Statistics: Num rows: 2 Data size: 430 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 430 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - - Stage: Stage-0 - Fetch Operator - limit: 2 - Processor Tree: - ListSink - -WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### --1887561756 10361.0 --1887561756 -8881.0 -PREHOOK: query: explain vectorization expression -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:tinyint) - predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5, 1] - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: double) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: smallint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 3 - Offset of rows: 10 - Statistics: Num rows: 3 Data size: 645 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 645 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 3 - Processor Tree: - ListSink - -PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### --64 -7196.0 -7196 --64 -6907.0 -6907 --64 -4803.0 -4803 diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 50e9b0e..9bfb200 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -51,6 +51,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -87,6 +88,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -136,7 +143,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -154,7 +161,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -189,6 +196,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -225,6 +233,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -232,13 +246,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd @@ -261,6 +275,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -301,6 +316,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -340,13 +361,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 5110 4607 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd @@ -369,6 +390,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (cint) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (cint) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -409,6 +431,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -466,10 +494,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.attr SCRIPT [] POSTHOOK: Lineage: test_1.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -489,6 +517,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -518,6 +547,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -536,10 +571,10 @@ POSTHOOK: Input: default@test_1 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -559,6 +594,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -588,6 +624,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -606,10 +648,10 @@ POSTHOOK: Input: default@test_1 3 4 1 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -629,6 +671,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -658,6 +701,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -694,10 +743,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.attr SCRIPT [] POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -717,6 +766,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -746,6 +796,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -764,10 +820,10 @@ POSTHOOK: Input: default@test_2 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -787,6 +843,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -816,6 +873,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -834,10 +897,10 @@ POSTHOOK: Input: default@test_2 3 4 NULL -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -857,6 +920,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 @@ -886,6 +950,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -904,3 +974,227 @@ POSTHOOK: Input: default@test_2 NULL NULL 4 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 248718130534 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 diff --git ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out deleted file mode 100644 index 7c4e0ed..0000000 --- ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ /dev/null @@ -1,227 +0,0 @@ -PREHOOK: query: create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dtest -POSTHOOK: query: create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dtest -PREHOOK: query: insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src order by a limit 1) y lateral view explode(a) t1 as c -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dtest -POSTHOOK: query: insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src order by a limit 1) y lateral view explode(a) t1 as c -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dtest -POSTHOOK: Lineage: dtest.a SCRIPT [] -POSTHOOK: Lineage: dtest.b SIMPLE [] -PREHOOK: query: explain vectorization detail -select sum(distinct a), count(distinct a) from dtest -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(distinct a), count(distinct a) from dtest -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: dtest - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int) - outputColumnNames: a - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT a), count(DISTINCT a) - bucketGroup: true - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: a (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: a:int, b:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(distinct a), count(distinct a) from dtest -PREHOOK: type: QUERY -PREHOOK: Input: default@dtest -#### A masked pattern was here #### -POSTHOOK: query: select sum(distinct a), count(distinct a) from dtest -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dtest -#### A masked pattern was here #### -300 1 -PREHOOK: query: explain vectorization detail -select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] - Select Operator - expressions: cint (type: int) - outputColumnNames: cint - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2] - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT cint), count(DISTINCT cint), avg(DISTINCT cint), std(DISTINCT cint) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: std - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 2:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3] - keys: cint (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [2] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), avg(DISTINCT KEY._col0:2._col0), std(DISTINCT KEY._col0:3._col0) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### --3482841611 6082 -572647.4204209142 6.153814687328991E8 diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index b743e64..3740fb6 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + /** * This class supports string and binary data by value reference -- i.e. each field is @@ -93,7 +95,12 @@ public void reset() { initBuffer(0); } - /** Set a field by reference. + /** + * Set a field by reference. + * + * This is a FAST version that assumes the caller has checked to make sure the sourceBuf + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. Only the output entry fields will be set by this method. * * @param elementNum index within column vector to set * @param sourceBuf container of source data @@ -161,6 +168,10 @@ public int bufferSize() { * DO NOT USE this method unless it's not practical to set data by reference with setRef(). * Setting data by reference tends to run a lot faster than copying data in. * + * This is a FAST version that assumes the caller has checked to make sure the sourceBuf + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. Only the output entry fields will be set by this method. + * * @param elementNum index within column vector to set * @param sourceBuf container of source data * @param start start byte position within source @@ -183,6 +194,10 @@ public void setVal(int elementNum, byte[] sourceBuf, int start, int length) { * DO NOT USE this method unless it's not practical to set data by reference with setRef(). * Setting data by reference tends to run a lot faster than copying data in. * + * This is a FAST version that assumes the caller has checked to make sure the sourceBuf + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. Only the output entry fields will be set by this method. + * * @param elementNum index within column vector to set * @param sourceBuf container of source data */ @@ -309,46 +324,90 @@ public void increaseBufferSpace(int nextElemLength) { /** Copy the current object contents into the output. Only copy selected entries, * as indicated by selectedInUse and the sel array. */ + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, BytesColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + BytesColumnVector output = (BytesColumnVector) outputColVector; + + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.setVal(0, vector[0], start[0], length[0]); - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.setVal(0, vector[0], start[0], length[0]); + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.setVal(i, vector[i], start[i], length[i]); - } - } - else { - for (int i = 0; i < size; i++) { - output.setVal(i, vector[i], start[i], length[i]); + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.setVal(i, vector[i], start[i], length[i]); + } + } else { + Arrays.fill(output.isNull, 0, size, false); + for(int i = 0; i < size; ++i) { + output.setVal(i, vector[i], start[i], length[i]); + } + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.setVal(i, vector[i], start[i], length[i]); + } + } else { + for(int i = 0; i < size; ++i) { + output.setVal(i, vector[i], start[i], length[i]); + } + } } - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; - output.isNull[i] = isNull[i]; + if (!isNull[i]) { + output.isNull[i] = false; + output.setVal(i, vector[i], start[i], length[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } + } + } else { + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + output.isNull[i] = false; + output.setVal(i, vector[i], start[i], length[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } } - } - else { - System.arraycopy(isNull, 0, output.isNull, 0, size); } } } @@ -390,9 +449,9 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { // Fill the all the vector entries with provided value public void fill(byte[] value) { - noNulls = true; isRepeating = true; - setRef(0, value, 0, value.length); + isNull[0] = false; + setVal(0, value, 0, value.length); } // Fill the column vector with nulls @@ -403,18 +462,53 @@ public void fillWithNulls() { isNull[0] = true; } + /** + * Set the element in this column vector from the given input vector. + * + * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE + * when the input / output has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - if (inputVector.isRepeating) { - inputElementNum = 0; + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating && inputElementNum != 0) { + throw new RuntimeException("Input column number expected to be 0 when isRepeating"); } - if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { - isNull[outElementNum] = false; - BytesColumnVector in = (BytesColumnVector) inputVector; - setVal(outElementNum, in.vector[inputElementNum], + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + BytesColumnVector in = (BytesColumnVector) inputColVector; + setVal(outputElementNum, in.vector[inputElementNum], in.start[inputElementNum], in.length[inputElementNum]); } else { - isNull[outElementNum] = true; + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; noNulls = false; } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index bce0bd7..5ca72b3 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -176,10 +176,36 @@ protected void flattenPush() { /** * Set the element in this column vector from the given input vector. - * This method can assume that the output does not have isRepeating set. + * + * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE + * when the input / output has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * */ - public abstract void setElement(int outElementNum, int inputElementNum, - ColumnVector inputVector); + public abstract void setElement(int outputElementNum, int inputElementNum, + ColumnVector inputColVector); + + /* + * Copy the current object contents into the output. Only copy selected entries + * as indicated by selectedInUse and the sel array. + */ + public abstract void copySelected( + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector); /** * Initialize the column vector. This method can be overridden by specific column vector types. diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java index 37b0bf5..1a3d592 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java @@ -41,6 +41,19 @@ public Decimal64ColumnVector(int size, int precision, int scale) { tempHiveDecWritable = new HiveDecimalWritable(); } + /** + * Set a Decimal64 field from a HiveDecimalWritable. + * + * This is a FAST version that assumes the caller has checked to make sure the writable + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. + * + * We will check for precision/scale range, so the entry's NULL may get set. + * Otherwise, only the output entry fields will be set by this method. + * + * @param elementNum + * @param writable + */ public void set(int elementNum, HiveDecimalWritable writable) { tempHiveDecWritable.set(writable); tempHiveDecWritable.mutateEnforcePrecisionScale(precision, scale); @@ -48,11 +61,23 @@ public void set(int elementNum, HiveDecimalWritable writable) { noNulls = false; isNull[elementNum] = true; } else { - isNull[elementNum] = false; vector[elementNum] = tempHiveDecWritable.serialize64(scale); } } + /** + * Set a Decimal64 field from a HiveDecimal. + * + * This is a FAST version that assumes the caller has checked to make sure the hiveDec + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. + * + * We will check for precision/scale range, so the entry's NULL may get set. + * Otherwise, only the output entry fields will be set by this method. + * + * @param elementNum + * @param hiveDec + */ public void set(int elementNum, HiveDecimal hiveDec) { tempHiveDecWritable.set(hiveDec); tempHiveDecWritable.mutateEnforcePrecisionScale(precision, scale); @@ -60,7 +85,6 @@ public void set(int elementNum, HiveDecimal hiveDec) { noNulls = false; isNull[elementNum] = true; } else { - isNull[elementNum] = false; vector[elementNum] = tempHiveDecWritable.serialize64(scale); } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index e41e19f..d074254 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -51,37 +53,72 @@ public DecimalColumnVector(int size, int precision, int scale) { // Fill the all the vector entries with provided value public void fill(HiveDecimal value) { - noNulls = true; isRepeating = true; + isNull[0] = false; if (vector[0] == null) { vector[0] = new HiveDecimalWritable(value); - } else { - vector[0].set(value); } + set(0, value); } @Override public void flatten(boolean selectedInUse, int[] sel, int size) { - // TODO Auto-generated method stub + throw new RuntimeException("Not implemented"); } + /** + * Set the element in this column vector from the given input vector. + * + * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE + * when the input / output has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - if (inputVector.isRepeating) { - inputElementNum = 0; + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating && inputElementNum != 0) { + throw new RuntimeException("Input column number expected to be 0 when isRepeating"); + } + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; } - if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { - vector[outElementNum].set( - ((DecimalColumnVector) inputVector).vector[inputElementNum], + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + vector[outputElementNum].set( + ((DecimalColumnVector) inputColVector).vector[inputElementNum], precision, scale); - if (!vector[outElementNum].isSet()) { - isNull[outElementNum] = true; + if (!vector[outputElementNum].isSet()) { + + // In effect, the input is NULL because of out-of-range precision/scale. + isNull[outputElementNum] = true; noNulls = false; - } else { - isNull[outElementNum] = false; } } else { - isNull[outElementNum] = true; + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; noNulls = false; } } @@ -98,23 +135,45 @@ public void stringifyValue(StringBuilder buffer, int row) { } } + /** + * Set a Decimal64 field from a HiveDecimalWritable. + * + * This is a FAST version that assumes the caller has checked to make sure the writable + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. + * + * We will check for precision/scale range, so the entry's NULL may get set. + * Otherwise, only the output entry fields will be set by this method. + * + * @param elementNum + * @param writable + */ public void set(int elementNum, HiveDecimalWritable writeable) { vector[elementNum].set(writeable, precision, scale); if (!vector[elementNum].isSet()) { noNulls = false; isNull[elementNum] = true; - } else { - isNull[elementNum] = false; } } + /** + * Set a decimal from a HiveDecimal. + * + * This is a FAST version that assumes the caller has checked to make sure the hiveDec + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. + * + * We will check for precision/scale range, so the entry's NULL may get set. + * Otherwise, only the output entry fields will be set by this method. + * + * @param elementNum + * @param hiveDec + */ public void set(int elementNum, HiveDecimal hiveDec) { vector[elementNum].set(hiveDec, precision, scale); if (!vector[elementNum].isSet()) { noNulls = false; isNull[elementNum] = true; - } else { - isNull[elementNum] = false; } } @@ -149,4 +208,95 @@ public void shallowCopyTo(ColumnVector otherCv) { other.precision = precision; other.vector = vector; } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + @Override + public void copySelected( + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + DecimalColumnVector output = (DecimalColumnVector) outputColVector; + + // We do not need to do a column reset since we are carefully changing the output. + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.set(0, vector[0]); + } else { + output.isNull[0] = true; + output.noNulls = false; + output.vector[0].setFromLong(0); + } + output.isRepeating = true; + return; + } + + // Handle normal case + + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.set(i, vector[i]); + } + } else { + Arrays.fill(output.isNull, 0, size, false); + for(int i = 0; i < size; ++i) { + output.set(i, vector[i]); + } + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.set(i, vector[i]); + } + } else { + for(int i = 0; i < size; ++i) { + output.set(i, vector[i]); + } + } + } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + if (!isNull[i]) { + output.isNull[i] = false; + output.set(i, vector[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } + } + } else { + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + output.isNull[i] = false; + output.set(i, vector[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } + } + } + } + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index e04af01..f671dd3 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -54,52 +54,94 @@ public DoubleColumnVector(int len) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + DoubleColumnVector output = (DoubleColumnVector) outputColVector; - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.vector[0] = vector[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.vector[0] = vector[0]; + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.vector[i] = vector[i]; + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.vector[i] = vector[i]; + } + } else { + Arrays.fill(output.isNull, 0, size, false); + for(int i = 0; i < size; ++i) { + output.vector[i] = vector[i]; + } + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } else { + for(int i = 0; i < size; ++i) { + output.vector[i] = vector[i]; + } + } } - } - else { - System.arraycopy(vector, 0, output.vector, 0, size); - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.vector[i] = vector[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + for (int i = 0; i < size; i++) { + output.vector[i] = vector[i]; + } } } } // Fill the column vector with the provided value public void fill(double value) { - noNulls = true; isRepeating = true; + isNull[0] = false; vector[0] = value; } @@ -132,17 +174,52 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { flattenNoNulls(selectedInUse, sel, size); } + /** + * Set the element in this column vector from the given input vector. + * + * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE + * when the input / output has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - if (inputVector.isRepeating) { - inputElementNum = 0; + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating && inputElementNum != 0) { + throw new RuntimeException("Input column number expected to be 0 when isRepeating"); + } + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; } - if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { - isNull[outElementNum] = false; - vector[outElementNum] = - ((DoubleColumnVector) inputVector).vector[inputElementNum]; + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + vector[outputElementNum] = + ((DoubleColumnVector) inputColVector).vector[inputElementNum]; } else { - isNull[outElementNum] = true; + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; noNulls = false; } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java index f813b1b..8a618ac 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java @@ -195,13 +195,55 @@ public int compareTo(IntervalDayTimeColumnVector intervalDayTimeColVector1, int asScratchIntervalDayTime(elementNum2)); } + /** + * Set the element in this column vector from the given input vector. + * + * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE + * when the input / output has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { - IntervalDayTimeColumnVector timestampColVector = (IntervalDayTimeColumnVector) inputVector; + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating && inputElementNum != 0) { + throw new RuntimeException("Input column number expected to be 0 when isRepeating"); + } + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } - totalSeconds[outElementNum] = timestampColVector.totalSeconds[inputElementNum]; - nanos[outElementNum] = timestampColVector.nanos[inputElementNum]; + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + IntervalDayTimeColumnVector timestampColVector = (IntervalDayTimeColumnVector) inputColVector; + totalSeconds[outputElementNum] = timestampColVector.totalSeconds[inputElementNum]; + nanos[outputElementNum] = timestampColVector.nanos[inputElementNum]; + } else { + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; + noNulls = false; + } } // Simplify vector by brute-force flattening noNulls and isRepeating @@ -229,8 +271,12 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { } /** - * Set a row from a HiveIntervalDayTime. - * We assume the entry has already been isRepeated adjusted. + * Set a field from a HiveIntervalDayTime. + * + * This is a FAST version that assumes the caller has checked to make sure the sourceBuf + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. Only the output entry fields will be set by this method. + * * @param elementNum * @param intervalDayTime */ @@ -240,7 +286,12 @@ public void set(int elementNum, HiveIntervalDayTime intervalDayTime) { } /** - * Set a row from the current value in the scratch interval day time. + * Set a field from the current value in the scratch interval day time. + * + * This is a FAST version that assumes the caller has checked to make sure the scratch interval + * day time is valid and elementNum is correctly adjusted for isRepeating. And, that the isNull + * entry has been set. Only the output entry fields will be set by this method. + * * @param elementNum */ public void setFromScratchIntervalDayTime(int elementNum) { @@ -260,47 +311,87 @@ public void setNullValue(int elementNum) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, IntervalDayTimeColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + IntervalDayTimeColumnVector output = (IntervalDayTimeColumnVector) outputColVector; + + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.totalSeconds[0] = totalSeconds[0]; - output.nanos[0] = nanos[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.totalSeconds[0] = totalSeconds[0]; + output.nanos[0] = nanos[0]; + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.totalSeconds[i] = totalSeconds[i]; - output.nanos[i] = nanos[i]; + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.totalSeconds[i] = totalSeconds[i]; + output.nanos[i] = nanos[i]; + } + } else { + Arrays.fill(output.isNull, 0, size, false); + System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.totalSeconds[i] = totalSeconds[i]; + output.nanos[i] = nanos[i]; + } + } else { + System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); + } } - } - else { - System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size); - System.arraycopy(nanos, 0, output.nanos, 0, size); - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.totalSeconds[i] = totalSeconds[i]; + output.nanos[i] = nanos[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); } } } @@ -310,8 +401,8 @@ public void copySelected( * @param intervalDayTime */ public void fill(HiveIntervalDayTime intervalDayTime) { - noNulls = true; isRepeating = true; + isNull[0] = false; totalSeconds[0] = intervalDayTime.getTotalSeconds(); nanos[0] = intervalDayTime.getNanos(); } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java index 7ecb1e0..dd71f4d 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java @@ -49,28 +49,65 @@ protected void childFlatten(boolean useSelected, int[] selected, int size) { child.flatten(useSelected, selected, size); } + /** + * Set the element in this column vector from the given input vector. + * + * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE + * when the input / output has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, - ColumnVector inputVector) { - ListColumnVector input = (ListColumnVector) inputVector; - if (input.isRepeating) { - inputElementNum = 0; + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); } - if (!input.noNulls && input.isNull[inputElementNum]) { - isNull[outElementNum] = true; - noNulls = false; - } else { - isNull[outElementNum] = false; + if (inputColVector.isRepeating && inputElementNum != 0) { + throw new RuntimeException("Input column number expected to be 0 when isRepeating"); + } + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + // CONCERN: isRepeating + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + ListColumnVector input = (ListColumnVector) inputColVector; int offset = childCount; int length = (int) input.lengths[inputElementNum]; int inputOffset = (int) input.offsets[inputElementNum]; - offsets[outElementNum] = offset; + offsets[outputElementNum] = offset; childCount += length; - lengths[outElementNum] = length; + lengths[outputElementNum] = length; child.ensureSize(childCount, true); for (int i = 0; i < length; ++i) { - child.setElement(i + offset, inputOffset + i, input.child); + final int outputIndex = i + offset; + child.isNull[outputIndex] = false; + child.setElement(outputIndex, inputOffset + i, input.child); } + } else { + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; + noNulls = false; } } @@ -116,4 +153,10 @@ public void unFlatten() { } } + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } + } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index 49e9184..18f31c1 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -54,44 +54,80 @@ public LongColumnVector(int len) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, LongColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + LongColumnVector output = (LongColumnVector) outputColVector; - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.vector[0] = vector[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.vector[0] = vector[0]; + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.vector[i] = vector[i]; + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.vector[i] = vector[i]; + } + } else { + Arrays.fill(output.isNull, 0, size, false); + System.arraycopy(vector, 0, output.vector, 0, size); + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } else { + System.arraycopy(vector, 0, output.vector, 0, size); + } } - } - else { - System.arraycopy(vector, 0, output.vector, 0, size); - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.vector[i] = vector[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(vector, 0, output.vector, 0, size); } } } @@ -101,51 +137,82 @@ public void copySelected( public void copySelected( boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.vector[0] = vector[0]; // automatic conversion to double is done here - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.vector[0] = vector[0]; // automatic conversion to double is done here + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.vector[i] = vector[i]; - } - } - else { - for(int i = 0; i < size; ++i) { - output.vector[i] = vector[i]; + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.vector[i] = vector[i]; + } + } else { + Arrays.fill(output.isNull, 0, size, false); + System.arraycopy(vector, 0, output.vector, 0, size); + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } else { + System.arraycopy(vector, 0, output.vector, 0, size); + } } - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.vector[i] = vector[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(vector, 0, output.vector, 0, size); } } } // Fill the column vector with the provided value public void fill(long value) { - noNulls = true; isRepeating = true; + isNull[0] = false; vector[0] = value; } @@ -178,17 +245,52 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { flattenNoNulls(selectedInUse, sel, size); } + /** + * Set the element in this column vector from the given input vector. + * + * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE + * when the input / output has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - if (inputVector.isRepeating) { - inputElementNum = 0; + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); } - if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { - isNull[outElementNum] = false; - vector[outElementNum] = - ((LongColumnVector) inputVector).vector[inputElementNum]; + if (inputColVector.isRepeating && inputElementNum != 0) { + throw new RuntimeException("Input column number expected to be 0 when isRepeating"); + } + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + vector[outputElementNum] = + ((LongColumnVector) inputColVector).vector[inputElementNum]; } else { - isNull[outElementNum] = true; + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; noNulls = false; } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java index 078c9c1..859f52c 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java @@ -53,30 +53,70 @@ protected void childFlatten(boolean useSelected, int[] selected, int size) { values.flatten(useSelected, selected, size); } + /** + * Set the element in this column vector from the given input vector. + * + * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE + * when the input / output has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, - ColumnVector inputVector) { - if (inputVector.isRepeating) { - inputElementNum = 0; + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); } - if (!inputVector.noNulls && inputVector.isNull[inputElementNum]) { - isNull[outElementNum] = true; - noNulls = false; - } else { - MapColumnVector input = (MapColumnVector) inputVector; - isNull[outElementNum] = false; + if (inputColVector.isRepeating && inputElementNum != 0) { + throw new RuntimeException("Input column number expected to be 0 when isRepeating"); + } + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + // CONCERN: isRepeating + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + MapColumnVector input = (MapColumnVector) inputColVector; + isNull[outputElementNum] = false; int offset = childCount; int length = (int) input.lengths[inputElementNum]; int inputOffset = (int) input.offsets[inputElementNum]; - offsets[outElementNum] = offset; + offsets[outputElementNum] = offset; childCount += length; - lengths[outElementNum] = length; + lengths[outputElementNum] = length; keys.ensureSize(childCount, true); values.ensureSize(childCount, true); for (int i = 0; i < length; ++i) { - keys.setElement(i + offset, inputOffset + i, input.keys); - values.setElement(i + offset, inputOffset + i, input.values); + final int inputIndex = inputOffset + i; + final int outputIndex = i + offset; + keys.isNull[outputIndex] = false; + keys.setElement(outputIndex, inputIndex, input.keys); + values.isNull[outputIndex] = false; + values.setElement(outputIndex, inputIndex, input.values); } + } else { + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; + noNulls = false; } } @@ -128,4 +168,10 @@ public void unFlatten() { values.unFlatten(); } } + + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java index b65c802..f646f81 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java @@ -53,21 +53,57 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { flattenNoNulls(selectedInUse, sel, size); } + /** + * Set the element in this column vector from the given input vector. + * + * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE + * when the input / output has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, - ColumnVector inputVector) { - if (inputVector.isRepeating) { - inputElementNum = 0; + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); } - if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { - isNull[outElementNum] = false; - ColumnVector[] inputFields = ((StructColumnVector) inputVector).fields; + if (inputColVector.isRepeating && inputElementNum != 0) { + throw new RuntimeException("Input column number expected to be 0 when isRepeating"); + } + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + ColumnVector[] inputFields = ((StructColumnVector) inputColVector).fields; for (int i = 0; i < inputFields.length; ++i) { - fields[i].setElement(outElementNum, inputElementNum, inputFields[i]); + ColumnVector outputField = fields[i]; + outputField.isNull[outputElementNum] = false; + outputField.setElement(outputElementNum, inputElementNum, inputFields[i]); } } else { + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; noNulls = false; - isNull[outElementNum] = true; } } @@ -134,4 +170,10 @@ public void setRepeating(boolean isRepeating) { public void shallowCopyTo(ColumnVector otherCv) { throw new UnsupportedOperationException(); // Implement if needed. } + + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java index 0e7f86f..c191cb5 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java @@ -241,13 +241,55 @@ public int compareTo(TimestampColumnVector timestampColVector1, int elementNum1, asScratchTimestamp(elementNum2)); } + /** + * Set the element in this column vector from the given input vector. + * + * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE + * when the input / output has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating && inputElementNum != 0) { + throw new RuntimeException("Input column number expected to be 0 when isRepeating"); + } - TimestampColumnVector timestampColVector = (TimestampColumnVector) inputVector; + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } - time[outElementNum] = timestampColVector.time[inputElementNum]; - nanos[outElementNum] = timestampColVector.nanos[inputElementNum]; + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVector; + time[outputElementNum] = timestampColVector.time[inputElementNum]; + nanos[outputElementNum] = timestampColVector.nanos[inputElementNum]; + } else { + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; + noNulls = false; + } } // Simplify vector by brute-force flattening noNulls and isRepeating @@ -275,23 +317,27 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { } /** - * Set a row from a timestamp. - * We assume the entry has already been isRepeated adjusted. + * Set a field from a Timestamp. + * + * This is a FAST version that assumes the caller has checked to make sure the timestamp + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. Only the output entry fields will be set by this method. + * * @param elementNum * @param timestamp */ public void set(int elementNum, Timestamp timestamp) { - if (timestamp == null) { - this.noNulls = false; - this.isNull[elementNum] = true; - } else { - this.time[elementNum] = timestamp.getTime(); - this.nanos[elementNum] = timestamp.getNanos(); - } + this.time[elementNum] = timestamp.getTime(); + this.nanos[elementNum] = timestamp.getNanos(); } /** - * Set a row from the current value in the scratch timestamp. + * Set a field from the current value in the scratch timestamp. + * + * This is a FAST version that assumes the caller has checked to make sure the current value in + * the scratch timestamp is valid and elementNum is correctly adjusted for isRepeating. And, + * that the isNull entry has been set. Only the output entry fields will be set by this method. + * * @param elementNum */ public void setFromScratchTimestamp(int elementNum) { @@ -311,47 +357,87 @@ public void setNullValue(int elementNum) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, TimestampColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + TimestampColumnVector output = (TimestampColumnVector) outputColVector; - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.time[0] = time[0]; - output.nanos[0] = nanos[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.time[0] = time[0]; + output.nanos[0] = nanos[0]; + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.time[i] = time[i]; - output.nanos[i] = nanos[i]; + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.time[i] = time[i]; + output.nanos[i] = nanos[i]; + } + } else { + Arrays.fill(output.isNull, 0, size, false); + System.arraycopy(time, 0, output.time, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.time[i] = time[i]; + output.nanos[i] = nanos[i]; + } + } else { + System.arraycopy(time, 0, output.time, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); + } } - } - else { - System.arraycopy(time, 0, output.time, 0, size); - System.arraycopy(nanos, 0, output.nanos, 0, size); - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.time[i] = time[i]; + output.nanos[i] = nanos[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(time, 0, output.time, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); } } } @@ -361,8 +447,8 @@ public void copySelected( * @param timestamp */ public void fill(Timestamp timestamp) { - noNulls = true; isRepeating = true; + isNull[0] = false; time[0] = timestamp.getTime(); nanos[0] = timestamp.getNanos(); } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java index 448461b..a999c04 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java @@ -55,21 +55,58 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { flattenNoNulls(selectedInUse, sel, size); } + /** + * Set the element in this column vector from the given input vector. + * + * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE + * when the input / output has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, - ColumnVector inputVector) { - if (inputVector.isRepeating) { - inputElementNum = 0; + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); } - if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { - isNull[outElementNum] = false; - UnionColumnVector input = (UnionColumnVector) inputVector; - tags[outElementNum] = input.tags[inputElementNum]; - fields[tags[outElementNum]].setElement(outElementNum, inputElementNum, - input.fields[tags[outElementNum]]); + if (inputColVector.isRepeating && inputElementNum != 0) { + throw new RuntimeException("Input column number expected to be 0 when isRepeating"); + } + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + UnionColumnVector input = (UnionColumnVector) inputColVector; + final int tag = input.tags[inputElementNum]; + tags[outputElementNum] = tag; + ColumnVector outputField = fields[tag]; + outputField.isNull[outputElementNum] = false; + outputField.setElement(outputElementNum, inputElementNum, + input.fields[tag]); } else { + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; noNulls = false; - isNull[outElementNum] = true; } } @@ -142,4 +179,10 @@ public void setRepeating(boolean isRepeating) { public void shallowCopyTo(ColumnVector otherCv) { throw new UnsupportedOperationException(); // Implement if needed. } + + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 42c7e8f..ea13c24 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -136,6 +136,50 @@ private static String toUTF8(Object o) { return o.toString(); } + public String stringifyColumn(int columnNum) { + if (size == 0) { + return ""; + } + StringBuilder b = new StringBuilder(); + b.append("columnNum "); + b.append(columnNum); + b.append(", size "); + b.append(size); + b.append(", selectedInUse "); + b.append(selectedInUse); + ColumnVector colVector = cols[columnNum]; + b.append(", noNulls "); + b.append(colVector.noNulls); + b.append(", isRepeating "); + b.append(colVector.isRepeating); + b.append('\n'); + + final boolean noNulls = colVector.noNulls; + final boolean[] isNull = colVector.isNull; + if (colVector.isRepeating) { + final boolean hasRepeatedValue = (noNulls || !isNull[0]); + for (int i = 0; i < size; i++) { + if (hasRepeatedValue) { + colVector.stringifyValue(b, 0); + } else { + b.append("NULL"); + } + b.append('\n'); + } + } else { + for (int i = 0; i < size; i++) { + final int batchIndex = (selectedInUse ? selected[i] : i); + if (noNulls || !isNull[batchIndex]) { + colVector.stringifyValue(b, batchIndex); + } else { + b.append("NULL"); + } + b.append('\n'); + } + } + return b.toString(); + } + @Override public String toString() { if (size == 0) { diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index 657ea34..669dc7f 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -3008,6 +3008,11 @@ private void generateDTIScalarCompareColumn(String[] tdesc) throws Exception { templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType); + + String vectorExpressionParametersBody = getDTIScalarColumnDisplayBody(operandType); + templateString = templateString.replaceAll( + "", vectorExpressionParametersBody); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } @@ -3028,6 +3033,30 @@ private void generateFilterDTIScalarCompareColumn(String[] tdesc) throws Excepti className, templateString); } + private String getDTIScalarColumnDisplayBody(String type) { + if (type.equals("date")) { + return + "Date dt = new Date(0);" + + " dt.setTime(DateWritable.daysToMillis((int) value));\n" + + " return \"date \" + dt.toString() + \", \" + getColumnParamString(0, colNum);"; + } else { + return + " return super.vectorExpressionParameters();"; + } + } + + private String getDTIColumnScalarDisplayBody(String type) { + if (type.equals("date")) { + return + "Date dt = new Date(0);" + + " dt.setTime(DateWritable.daysToMillis((int) value));\n" + + " return getColumnParamString(0, colNum) + \", date \" + dt.toString();"; + } else { + return + " return super.vectorExpressionParameters();"; + } + } + private void generateDTIColumnCompareScalar(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType = tdesc[2]; @@ -3040,6 +3069,11 @@ private void generateDTIColumnCompareScalar(String[] tdesc) throws Exception { templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType); + + String vectorExpressionParametersBody = getDTIColumnScalarDisplayBody(operandType); + templateString = templateString.replaceAll( + "", vectorExpressionParametersBody); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); }