diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 942f97a..a2832de 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -61,7 +61,8 @@ minitez.query.files=acid_vectorization_original_tez.q,\ hybridgrace_hashjoin_2.q,\ multi_count_distinct.q,\ tez-tag.q,\ - tez_union_with_udf.q + tez_union_with_udf.q,\ + vectorization_div0.q minillap.shared.query.files=insert_into1.q,\ @@ -732,18 +733,25 @@ minillaplocal.query.files=\ vector_groupby_grouping_sets_limit.q,\ vector_groupby_grouping_window.q,\ vector_groupby_rollup1.q,\ + vector_if_expr_2.q,\ vector_join30.q,\ vector_join_filters.q,\ vector_leftsemi_mapjoin.q,\ + vector_like_2.q,\ vector_llap_text_1.q,\ vector_mapjoin_reduce.q,\ vector_number_compare_projection.q,\ + vector_order_null.q,\ + vector_outer_reference_windowed.q,\ vector_partitioned_date_time.q,\ vector_ptf_1.q,\ vector_ptf_part_simple.q,\ vector_reuse_scratchcols.q,\ + vector_string_decimal.q,\ vector_udf_adaptor_1.q,\ + vector_udf_string_to_boolean.q,\ vector_udf1.q,\ + vector_udf2.q,\ vector_windowing.q,\ vector_windowing_expressions.q,\ vector_windowing_gby.q,\ @@ -756,6 +764,7 @@ minillaplocal.query.files=\ vector_windowing_streaming.q,\ vector_windowing_windowspec.q,\ vector_windowing_windowspec4.q,\ + vectorization_div0.q,\ vectorization_input_format_excludes.q,\ vectorized_insert_into_bucketed_table.q,\ vectorization_short_regress.q,\ diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java index 9d33656..36810d9 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java @@ -158,9 +158,44 @@ protected void decodeBatch(OrcEncodedColumnBatch batch, VectorizedRowBatch.DEFAULT_SIZE); } trace.logTreeReaderNextVector(idx); + + /* + * Currently, ORC's TreeReaderFactory class does this: + * + * public void nextBatch(VectorizedRowBatch batch, + * int batchSize) throws IOException { + * batch.cols[0].reset(); + * batch.cols[0].ensureSize(batchSize, false); + * nextVector(batch.cols[0], null, batchSize); + * } + * + * CONCERN: + * For better performance, we'd like to *not* do a ColumnVector.reset() + * which zeroes out isNull. Why? Because there are common cases where + * ORC will *immediately* copy its null flags into the isNull array. This is a + * waste. + * + * For correctness now we must do it for now. + * + * The best solution is for ORC to manage the noNulls and isNull array itself + * because it knows what NULLs the next set of rows contains. + * + * Its management of the fields of ColumnVector is a little different than what we + * must do for vector expressions. For those, we must maintain the invariant that if + * noNulls is true there are no NULLs in any part of the isNull array. This is + * because the next vector expression relies on the invariant. + * + * Given that ORC (or any other producer) is providing *read-only* batches to the + * consumer, what is important is that the isNull array through batch.size has + * integrity with the noNulls flag. So, if ORC is giving us 100 rows (for example) + * and none of them are NULL, it can safely set or make sure the first 100 isNull + * entries are false and safely set noNulls to true. Any other NULLs (true entries) + * in isNull are irrelevant because ORC owns the batch. It just need to make sure + * it doesn't get confused. + * + */ ColumnVector cv = cvb.cols[idx]; - cv.noNulls = true; - cv.reset(); // Reset to work around some poor assumptions in ORC. + cv.reset(); cv.ensureSize(batchSize, false); reader.nextVector(cv, null, batchSize); } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt index 64c4e01..cb29953 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,31 +67,23 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. - */ - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputVector[0] = vector1[0] vector2[0]; } else if (inputColVector1.isRepeating) { final vector1Value = vector1[0]; diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt index ae0d348..d7c7fb4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,33 +68,13 @@ public class extends VectorExpression { DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; HiveDecimalWritable[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - if (inputColVector1.noNulls && inputColVector2.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, - inputColVector1.isRepeating && inputColVector2.isRepeating, - batch.selectedInUse, sel, n); - } - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt index e7c2385..00b8d96 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt @@ -15,10 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; @@ -54,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -63,45 +71,82 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] value; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector.isRepeating) { - outputVector[0] = vector[0] value; - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] value; - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] value; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = vector[i] value; + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = vector[i] value; } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] value; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = vector[i] value; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = vector[i] value; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } #IF CHECKED diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt index d5aef78..9d0365e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt @@ -15,13 +15,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -55,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,68 +71,88 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, vector[0], value, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - - if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(0, vector[0], value, outputColVector); - } else if (inputColVector.noNulls) { + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, vector[i], value, outputColVector); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector[i], value, outputColVector); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - - /* - * Null data entries are not set to a special non-zero value because all null math operations - * are checked, meaning that a zero-divide always results in a null result. - */ } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt index fd31672..14a0993 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,31 +66,23 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. - */ - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputVector[0] = vector1[0] vector2[0] ? 1 : 0; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { @@ -120,9 +118,9 @@ public class extends VectorExpression { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt index 51e6994..79b1bbd 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; @@ -53,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,63 +68,86 @@ public class extends VectorExpression { inputColVector = () batch.cols[colNum]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = vector[0] value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] value ? 1 : 0; - } + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] value ? 1 : 0; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = vector[i] value ? 1 : 0; + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = vector[i] value ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + * NOTE: We can't avoid conditional statements for LONG/DOUBLE because of NULL + * comparison requirements. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector[i] value ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector[i] value ? 1 : 0; - } + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt index dd5330d..1e20dba 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,22 +67,14 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt index 2be16cc..c8a2665 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,34 +67,15 @@ public class extends VectorExpression { DecimalColumnVector inputColVector1 = (DecimalColumnVector) batch.cols[colNum1]; DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; int[] sel = batch.selected; - int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; HiveDecimalWritable[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - if (inputColVector1.noNulls && inputColVector2.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, - inputColVector1.isRepeating && inputColVector2.isRepeating, - batch.selectedInUse, sel, n); - } - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); @@ -103,10 +90,12 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector1[0], vector2[i], outputColVector); } } else { for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector1[0], vector2[i], outputColVector); } } @@ -114,10 +103,12 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector1[i], vector2[0], outputColVector); } } else { for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector1[i], vector2[0], outputColVector); } } @@ -125,10 +116,12 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector1[i], vector2[i], outputColVector); } } else { for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector1[i], vector2[i], outputColVector); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt index 159a61e..74fd0e2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; @@ -53,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,50 +70,83 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (value == 0) { // Denominator is zero, convert the batch to nulls outputColVector.noNulls = false; outputColVector.isRepeating = true; outputIsNull[0] = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; } else if (inputColVector.isRepeating) { - outputVector[0] = vector[0] value; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] value; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] value; - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] value; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = vector[i] value; + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = vector[i] value; } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { + for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] value; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = vector[i] value; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = vector[i] value; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt index 2631468..15d21d0 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt @@ -18,10 +18,11 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -55,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,28 +71,12 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; +; HiveDecimalWritable[] vector = inputColVector.vector; HiveDecimalWritable[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (value.compareTo(HiveDecimal.ZERO) == 0) { @@ -93,45 +84,84 @@ public class extends VectorExpression { outputColVector.noNulls = false; outputColVector.isRepeating = true; outputIsNull[0] = true; + return; } else if (inputColVector.isRepeating) { - DecimalUtil.Checked(0, vector[0], value, outputColVector); + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, vector[0], value, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, vector[i], value, outputColVector); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector[i], value, outputColVector); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - - // copy isNull entry first because operation may overwrite it - outputIsNull[i] = inputIsNull[i]; - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - // copy isNull entries first because operation may overwrite them - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - - /* - * Null data entries are not set to a special non-zero value because all null math operations - * are checked, meaning that a zero-divide always results in a null result. - */ } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt index 811f6db..cead258 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -44,6 +46,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { this.evaluateChildren(batch); } @@ -53,49 +61,77 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - int n = batch.size; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = ( vector[0]); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = ( vector[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ( vector[i]); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = ( vector[i]); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = ( vector[i]); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = ( vector[i]); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { + for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = ( vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = ( vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ( vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt index b200ef9..53e4bb4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt @@ -15,10 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -48,6 +50,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { this.evaluateChildren(batch); } @@ -57,49 +65,83 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = - vector[0]; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = - vector[0]; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = -vector[i]; - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = -vector[i]; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = -vector[i]; + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = -vector[i]; } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = -vector[i]; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = -vector[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = -vector[i]; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; + } #IF CHECKED diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt index 027e6ed..0d3ee2b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt @@ -21,9 +21,11 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import java.sql.Date; +import org.apache.hadoop.hive.serde2.io.DateWritable; /** - * Generated from template DTIColumnCompareScalar.txt, which covers comparison + * Generated from template DTIColumnCompareScalar.txt, which covers comparison * expressions between a datetime/interval column and a scalar of the same type. The boolean output * is stored in a separate boolean column. */ @@ -40,6 +42,11 @@ public class extends { } @Override + public String vectorExpressionParameters() { + + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt index ebc0d8a..be5f641 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt @@ -15,12 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.udf.UDFToString; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.io.LongWritable; +import java.sql.Date; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -45,6 +44,11 @@ public class extends { } @Override + public String vectorExpressionParameters() { + + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt index 5db9a0b..bf1128a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt @@ -61,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -75,22 +81,14 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt index bf4b24c..172bd39 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Date; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -61,6 +63,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,34 +82,60 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[0])); + dtm.( + scratchDate1, value, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[0])); - dtm.( - scratchDate1, value, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { + if (inputColVector1.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.( @@ -109,24 +143,39 @@ public class extends VectorExpression { outputVector[i] = DateWritable.dateToDays(outputDate); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt index 847ebac..a515319 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -60,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,20 +81,12 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; - long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + long[] vector1 = inputColVector1.vector; - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt index 180bebc..0bae7da 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -61,6 +62,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,33 +81,58 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector1.isRepeating) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { + if (inputColVector1.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.( @@ -108,24 +140,39 @@ public class extends VectorExpression { outputColVector.setFromScratch(i); } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt index 4f12315..42046e0 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import java.sql.Date; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -75,6 +77,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -88,34 +96,59 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchIntervalYearMonth2.set((int) vector2[0]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector2.isRepeating) { - scratchIntervalYearMonth2.set((int) vector2[0]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { + if (inputColVector2.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { scratchIntervalYearMonth2.set((int) vector2[i]); dtm.( @@ -123,24 +156,39 @@ public class extends VectorExpression { outputVector[i] = DateWritable.dateToDays(outputDate); } } - } else { /* there are nulls */ + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt index a6fa2ac..191eaa6 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -75,6 +76,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -88,52 +95,91 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + value, inputColVector2.asScratch(0), outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - dtm.( - value, inputColVector2.asScratch(0), outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { + if (inputColVector2.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { dtm.( value, inputColVector2.asScratch(i), outputColVector.getScratch()); outputColVector.setFromScratch(i); } } - } else { /* there are nulls */ + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt index 30b03ba..5b643bb 100644 --- ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,39 +68,18 @@ public class extends VectorExpression { Decimal64ColumnVector inputColVector2 = (Decimal64ColumnVector) batch.cols[colNum2]; Decimal64ColumnVector outputColVector = (Decimal64ColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; boolean[] outputIsNull = outputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } - final long outputDecimal64AbsMax = HiveDecimalWritable.getDecimal64AbsMax(outputColVector.precision); - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - if (inputColVector1.noNulls && inputColVector2.noNulls) { - - /* - * Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, - inputColVector1.isRepeating && inputColVector2.isRepeating, - batch.selectedInUse, sel, n); - } - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt index 81dcf33..fa091d4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt @@ -18,10 +18,11 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.expressions.Decimal64Util; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -55,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,55 +71,71 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - final long outputDecimal64AbsMax = HiveDecimalWritable.getDecimal64AbsMax(outputColVector.precision); - if (inputColVector.noNulls) { - - /* - * Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - // The following may override a "false" null setting if an error or overflow occurs. - final long result = vector[0] value; - outputVector[0] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = vector[0] value; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } else { + outputVector[0] = result; + } + } else { outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else if (inputColVector.noNulls) { + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } } - } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { final long result = vector[i] value; outputVector[i] = result; @@ -122,37 +145,48 @@ public class extends VectorExpression { } } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } } - - // Currently, we defer division, etc to regular HiveDecimal so we don't do any null - // default value setting here. } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt index dc6ccb9..db813b7 100644 --- ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt @@ -18,10 +18,11 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.expressions.Decimal64Util; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -56,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,95 +72,120 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - final long outputDecimal64AbsMax = HiveDecimalWritable.getDecimal64AbsMax(outputColVector.precision); - if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = value vector[0]; - outputVector[0] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = value vector[0]; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } else { + outputVector[0] = result; + } + } else { outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else if (inputColVector.noNulls) { + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { final long result = value vector[i]; outputVector[i] = result; if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; outputIsNull[i] = true; + outputColVector.noNulls = false; } } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } } - - // Currently, we defer division, etc to regular HiveDecimal so we don't do any null - // default value setting here. } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt index 1ab5228..5e684b4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -27,7 +29,6 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; -import java.util.Arrays; public class extends VectorExpression { private static final long serialVersionUID = 1L; @@ -49,6 +50,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { this.evaluateChildren(batch); } @@ -58,54 +65,86 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + (0, vector[0], outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. - outputIsNull[0] = inputIsNull[0]; - (0, vector[0], outputColVector); - outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - // Set isNull because decimal operation can yield a null. - outputIsNull[i] = false; - (i, vector[i], outputColVector); - } + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } + } } else { + if (!outputColVector.noNulls) { - // Set isNull because decimal operation can yield a null. - Arrays.fill(outputIsNull, 0, n, false); + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. (i, vector[i], outputColVector); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - (i, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - (i, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } - outputColVector.isRepeating = false; } } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt index 36ad892..55eb50e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt @@ -57,28 +57,27 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } inputColVector = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + [] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. + if ((vector[0] < leftValue || vector[0] > rightValue)) { - + // Entire batch is filtered out. batch.size = 0; } @@ -105,12 +104,9 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((vector[0] < leftValue || vector[0] > rightValue)) { - + // Entire batch is filtered out. batch.size = 0; } @@ -121,7 +117,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((leftValue <= vector[i] && vector[i] <= rightValue)) { sel[newSize++] = i; } @@ -132,7 +128,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((leftValue <= vector[i] && vector[i] <= rightValue)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt index 150d341..e458992 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,15 +68,10 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - + // filter rows with NULL on left input int newSize; newSize = NullUtil.filterNulls(batch.cols[colNum1], batch.selectedInUse, sel, n); @@ -85,12 +86,9 @@ public class extends VectorExpression { n = batch.size = newSize; batch.selectedInUse = true; } - + // All rows with nulls have been filtered out, so just do normal filter for non-null case if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. if (!(vector1[0] vector2[0])) { batch.size = 0; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt index a9ddeca..c955c06 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt @@ -51,25 +51,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } inputColVector = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + [] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(vector[0] value)) { //Entire batch is filtered out. batch.size = 0; @@ -97,9 +96,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(vector[0] value)) { //Entire batch is filtered out. batch.size = 0; @@ -111,7 +108,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (vector[i] value) { sel[newSize++] = i; } @@ -122,7 +119,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (vector[i] value) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt index 7c41f3e..f42668c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt @@ -60,26 +60,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + HiveDecimalWritable[] vector = inputColVector.vector; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. if ((DecimalUtil.compare(vector[0], leftValue) < 0 || DecimalUtil.compare(vector[0], rightValue) > 0)) { // Entire batch is filtered out. @@ -108,10 +106,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((DecimalUtil.compare(vector[0], leftValue) < 0 || DecimalUtil.compare(vector[0], rightValue) > 0)) { // Entire batch is filtered out. @@ -124,19 +119,19 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((DecimalUtil.compare(leftValue, vector[i]) <= 0 && DecimalUtil.compare(vector[i], rightValue) <= 0)) { sel[newSize++] = i; } } } - + // Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((DecimalUtil.compare(leftValue, vector[i]) <= 0 && DecimalUtil.compare(vector[i], rightValue) <= 0)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt index 6a82183..77fe7ae 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,15 +68,10 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; HiveDecimalWritable[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt index 80a19d9..078b132 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt @@ -53,24 +53,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + HiveDecimalWritable[] vector = inputColVector.vector; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!(DecimalUtil.compare(vector[0], value) 0)) { // Entire batch is filtered out. @@ -99,9 +99,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(DecimalUtil.compare(vector[0], value) 0)) { // Entire batch is filtered out. @@ -114,7 +112,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(vector[i], value) 0) { sel[newSize++] = i; } @@ -126,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(vector[i], value) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt index 4b7e849..20dbaba 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt @@ -53,24 +53,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + HiveDecimalWritable[] vector = inputColVector.vector; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!(DecimalUtil.compare(value, vector[0]) 0)) { // Entire batch is filtered out. @@ -99,9 +99,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(DecimalUtil.compare(value, vector[0]) 0)) { // Entire batch is filtered out. @@ -114,7 +112,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(value, vector[i]) 0) { sel[newSize++] = i; } @@ -126,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(value, vector[i]) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt index f741409..4afed54 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt @@ -56,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,13 +71,8 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - [] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + [] vector1 = inputColVector1.vector; // filter rows with NULL on left input int newSize; @@ -90,9 +91,6 @@ public class extends VectorExpression { // All rows with nulls have been filtered out, so just do normal filter for non-null case if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. if (!(vector1[0] inputColVector2.(0))) { batch.size = 0; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt index 8ece14f..8f8104d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt @@ -57,24 +57,22 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector.isNull; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(value inputColVector.(0))) { //Entire batch is filtered out. batch.size = 0; @@ -102,9 +100,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(value inputColVector.(0))) { //Entire batch is filtered out. batch.size = 0; @@ -116,7 +112,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value inputColVector.(i)) { sel[newSize++] = i; } @@ -127,7 +123,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value inputColVector.(i)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt index 18840f1..28b5704 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt @@ -52,25 +52,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } inputColVector = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + [] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(value vector[0])) { //Entire batch is filtered out. batch.size = 0; @@ -98,9 +97,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(value vector[0])) { //Entire batch is filtered out. batch.size = 0; @@ -112,7 +109,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value vector[i]) { sel[newSize++] = i; } @@ -123,7 +120,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value vector[i]) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt index b9a332a..b7f70e1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt @@ -56,27 +56,26 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -108,9 +107,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -124,20 +121,20 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; } } } - + //Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt index 0f0cb2e..2d18d1d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,36 +67,31 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; + byte[][] vector1 = inputColVector1.vector; byte[][] vector2 = inputColVector2.vector; int[] start1 = inputColVector1.start; int[] start2 = inputColVector2.start; int[] length1 = inputColVector1.length; int[] length2 = inputColVector2.length; - - // return immediately if batch is empty - if (n == 0) { - return; - } - + // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - + /* Either all must remain selected or all will be eliminated. * Repeating property will not change. */ - if (!((vector1[0], start1[0], length1[0], + if (!((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0]))) { batch.size = 0; - } + } } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -99,7 +100,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -114,7 +115,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -123,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -137,7 +138,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -146,7 +147,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -173,7 +174,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -184,7 +185,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -206,7 +207,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -215,7 +216,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -231,7 +232,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -242,7 +243,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -254,16 +255,16 @@ public class extends VectorExpression { } } } - + // handle case where only input 1 has nulls } else if (inputColVector2.noNulls) { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { if (nullPos1[0] || - !((vector1[0], start1[0], length1[0], + !((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0]))) { - batch.size = 0; + batch.size = 0; return; - } + } } else if (inputColVector1.isRepeating) { if (nullPos1[0]) { @@ -275,7 +276,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -284,7 +285,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -300,7 +301,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -311,7 +312,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -328,7 +329,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -339,7 +340,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -349,17 +350,17 @@ public class extends VectorExpression { batch.size = newSize; batch.selectedInUse = true; } - } + } } - + // handle case where both inputs have nulls } else { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { if (nullPos1[0] || nullPos2[0] || - !((vector1[0], start1[0], length1[0], + !((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0]))) { - batch.size = 0; - } + batch.size = 0; + } } else if (inputColVector1.isRepeating) { if (nullPos1[0]) { batch.size = 0; @@ -370,7 +371,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -381,7 +382,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -402,7 +403,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -413,7 +414,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -430,7 +431,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos1[i] && !nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -441,7 +442,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos1[i] && !nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt index a85a889..76ec8a0 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt @@ -52,27 +52,26 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!((vector[0], start[0], length[0], value, 0, value.length))) { //Entire batch is filtered out. @@ -101,9 +100,7 @@ public abstract class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!((vector[0], start[0], length[0], value, 0, value.length))) { //Entire batch is filtered out. @@ -116,19 +113,19 @@ public abstract class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((vector[i], start[i], length[i], value, 0, value.length)) { sel[newSize++] = i; } } } - + //Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((vector[i], start[i], length[i], value, 0, value.length)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt index f3d1e58..91d8da5c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt @@ -53,27 +53,26 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!((value, 0, value.length, vector[0], start[0], length[0]))) { //Entire batch is filtered out. @@ -102,9 +101,7 @@ public abstract class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!((value, 0, value.length, vector[0], start[0], length[0]))) { //Entire batch is filtered out. @@ -117,19 +114,19 @@ public abstract class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((value, 0, value.length, vector[i], start[i], length[i])) { sel[newSize++] = i; } } } - + //Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((value, 0, value.length, vector[i], start[i], length[i])) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt index 53bf271..604060a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt @@ -59,25 +59,22 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector.isNull; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. if ((inputColVector.compareTo(0, leftValue) < 0 || inputColVector.compareTo(0, rightValue) > 0)) { // Entire batch is filtered out. @@ -106,10 +103,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((inputColVector.compareTo(0, leftValue) < 0 || inputColVector.compareTo(0, rightValue) > 0)) { // Entire batch is filtered out. @@ -122,7 +116,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { sel[newSize++] = i; } @@ -134,7 +128,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt index eaa58c7..f9bc9ee 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,13 +68,8 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - [] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + [] vector2 = inputColVector2.vector; // filter rows with NULL on left input int newSize; @@ -87,9 +88,6 @@ public class extends VectorExpression { // All rows with nulls have been filtered out, so just do normal filter for non-null case if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. if (!(inputColVector1.(0) vector2[0])) { batch.size = 0; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt index 2e38269..fc1be95 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt @@ -53,24 +53,22 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector.isNull; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(inputColVector.(0) value)) { //Entire batch is filtered out. batch.size = 0; @@ -98,9 +96,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(inputColVector.(0) value)) { //Entire batch is filtered out. batch.size = 0; @@ -112,7 +108,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector.(i) value) { sel[newSize++] = i; } @@ -123,7 +119,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector.(i) value) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt index 697e3ef..0a541f9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt @@ -57,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -70,12 +76,6 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt index 435316d..68e0006 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt @@ -56,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,18 +70,10 @@ public class extends VectorExpression { inputColVector1 = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector1.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector1.isNull; if (inputColVector1.noNulls) { if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(inputColVector1.compareTo(0, value) 0)) { //Entire batch is filtered out. batch.size = 0; @@ -103,9 +101,7 @@ public class extends VectorExpression { } } else { if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(inputColVector1.compareTo(0, value) 0)) { //Entire batch is filtered out. batch.size = 0; @@ -117,7 +113,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector1.compareTo(i, value) 0) { sel[newSize++] = i; } @@ -128,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector1.compareTo(i, value) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt index 4887ad2..d5952de 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt @@ -56,6 +56,13 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -63,18 +70,10 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector2.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector2.isNull; if (inputColVector2.noNulls) { if (inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!(inputColVector2.compareTo(value, 0) 0)) { // Entire batch is filtered out. @@ -103,9 +102,7 @@ public class extends VectorExpression { } } else { if (inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(inputColVector2.compareTo(value, 0) 0)) { // Entire batch is filtered out. @@ -118,7 +115,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector2.compareTo(value, i) 0) { sel[newSize++] = i; } @@ -130,7 +127,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector2.compareTo(value, i) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt index cc86a3e..44e8e18 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt @@ -58,27 +58,26 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -110,9 +109,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -126,20 +123,20 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; } } } - + //Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt index 52f1d9e..cf9afa0 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt @@ -22,7 +22,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -59,6 +58,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -68,20 +73,16 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = arg1ColVector.vector; [] vector2 = arg2ColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { outputColVector.fill(arg3Scalar); @@ -93,22 +94,26 @@ public class extends VectorExpression { // reduce the number of code paths needed below. arg2ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); - outputIsNull[i] = (vector1[i] == 1 ? - arg2ColVector.isNull[i] : false); + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); } } else { for(int i = 0; i != n; i++) { outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); - outputIsNull[i] = (vector1[i] == 1 ? - arg2ColVector.isNull[i] : false); + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt index 1693e8f..0eb42d1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt @@ -22,7 +22,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -59,6 +58,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -68,20 +73,16 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = arg1ColVector.vector; [] vector3 = arg3ColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -95,18 +96,25 @@ public class extends VectorExpression { // for when arg3ColVector is repeating or has no nulls. arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = (vector1[i] == 1 ? arg2Scalar : vector3[i]); + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); } } else { for(int i = 0; i != n; i++) { outputVector[i] = (vector1[i] == 1 ? arg2Scalar : vector3[i]); + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt index ebdfe47..cec1231 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -67,48 +73,90 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = false; // output is a scalar which we know is non null - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = arg1ColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { outputColVector.fill(arg3Scalar); } - } else if (arg1ColVector.noNulls) { + return; + } + + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * Since we always generate a result without NULLs, we can optimize this case similar to + * the optimization above... + */ + if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - arg2Scalar : arg3Scalar); - outputIsNull[i] = false; - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - arg2Scalar : arg3Scalar); + arg2Scalar : arg3Scalar); } - Arrays.fill(outputIsNull, 0, n, false); } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt index 9767973..7c5b614 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt @@ -61,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -75,22 +81,14 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt index ca5829c..84d6c4a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Date; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -61,6 +63,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,34 +82,60 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchIntervalYearMonth1.set((int) vector1[0]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - scratchIntervalYearMonth1.set((int) vector1[0]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { + if (inputColVector1.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { scratchIntervalYearMonth1.set((int) vector1[i]); dtm.( @@ -109,24 +143,39 @@ public class extends VectorExpression { outputVector[i] = DateWritable.dateToDays(outputDate); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt index d6e45ac..cfe44c1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -73,21 +79,12 @@ public class extends VectorExpression { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt index 6e232e7..22f7abf 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -60,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -73,34 +80,59 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchIntervalYearMonth1.set((int) vector1[0]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - scratchIntervalYearMonth1.set((int) vector1[0]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { + if (inputColVector1.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { scratchIntervalYearMonth1.set((int) vector1[i]); dtm.( @@ -108,24 +140,39 @@ public class extends VectorExpression { outputColVector.setFromScratchTimestamp(i); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt index 041a651..ffc2cec 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Date; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -75,6 +77,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -88,34 +96,60 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[0])); + dtm.( + value, scratchDate2, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[0])); - dtm.( - value, scratchDate2, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { + if (inputColVector2.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.( @@ -123,24 +157,39 @@ public class extends VectorExpression { outputVector[i] = DateWritable.dateToDays(outputDate); } } - } else { /* there are nulls */ + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt index f2d4eaf..157e95e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -72,6 +73,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -85,52 +92,93 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(0), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - dtm.( - value, inputColVector2.asScratchTimestamp(0), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { + if (inputColVector2.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { dtm.( value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); outputColVector.setFromScratchTimestamp(i); } } - } else { /* there are nulls */ + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + outputIsNull[i] = inputIsNull[i]; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + outputIsNull[i] = inputIsNull[i]; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt index bd2cbac..e3f36b9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,21 +68,13 @@ public class extends VectorExpression { TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt index 889c445..2957c73 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -54,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,62 +68,86 @@ public class extends VectorExpression { inputColVector1 = () batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector1.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + [] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector1[0] value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector1.noNulls; if (inputColVector1.noNulls) { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = vector1[0] value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] value ? 1 : 0; - } + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector1[i] value ? 1 : 0; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = vector1[i] value ? 1 : 0; + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = vector1[i] value ? 1 : 0; } } - } else { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector1[0] value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + * NOTE: We can't avoid conditional statements for LONG/DOUBLE because of NULL + * comparison requirements. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector1[i] value ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector1[i] value ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt index 4d79283..bb7f57d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -54,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,61 +69,85 @@ public class extends VectorExpression { TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; - long[] outputVector = outputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + long[] outputVector = outputColVector.vector; + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = value inputColVector.(0) ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value inputColVector.(0) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value inputColVector.(i) ? 1 : 0; - } + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = + value inputColVector.(i) ? 1 : 0; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = + value inputColVector.(i) ? 1 : 0; + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - outputVector[i] = value inputColVector.(i) ? 1 : 0; + outputVector[i] = + value inputColVector.(i) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value inputColVector.(0) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = value inputColVector.(i) ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = value inputColVector.(i) ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt index 67106c2..695a063 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -66,6 +68,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -75,45 +83,82 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } if (inputColVector.isRepeating) { - outputVector[0] = value vector[0]; - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = value vector[0]; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value vector[i]; - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value vector[i]; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = value vector[i]; + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = value vector[i]; } } - } else { /* there are nulls */ + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = value vector[i]; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = value vector[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = value vector[i]; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } #IF CHECKED diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt index 3ffca6c..a0dbea6 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,60 +72,86 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, value, vector[0], outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullDataEntriesDecimal(outputColVector, batch.selectedInUse, sel, n); return; } - - if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(0, value, vector[0], outputColVector); - } else if (inputColVector.noNulls) { + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, value, vector[i], outputColVector); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, value, vector[i], outputColVector); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt index 9f4ec50..06cd7e6 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -53,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,63 +69,84 @@ public class extends VectorExpression { inputColVector = () batch.cols[colNum]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = value vector[0] ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value vector[i] ? 1 : 0; - } + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value vector[i] ? 1 : 0; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = value vector[i] ? 1 : 0; + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = value vector[i] ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = value vector[i] ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = value vector[i] ? 1 : 0; - } + outputIsNull[i] = false; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt index aa33354..a5de652 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -65,6 +67,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,56 +82,80 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - boolean hasDivBy0 = false; if (inputColVector.isRepeating) { - denom = vector[0]; - outputVector[0] = value denom; - hasDivBy0 = hasDivBy0 || (denom == 0); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + denom = vector[0]; + outputVector[0] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - denom = vector[i]; - outputVector[i] = value denom; - hasDivBy0 = hasDivBy0 || (denom == 0); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { denom = vector[i]; outputVector[i] = value denom; hasDivBy0 = hasDivBy0 || (denom == 0); } } - } else { /* there are nulls */ + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; denom = vector[i]; outputVector[i] = value denom; hasDivBy0 = hasDivBy0 || (denom == 0); - outputIsNull[i] = inputIsNull[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { denom = vector[i]; outputVector[i] = value denom; hasDivBy0 = hasDivBy0 || (denom == 0); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt index 650101c..b4ec35c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,67 +72,89 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; HiveDecimalWritable[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, value, vector[0], outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } if (inputColVector.noNulls) { + if (batch.selectedInUse) { - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - - if (inputColVector.isRepeating) { - DecimalUtil.Checked(0, value, vector[0], outputColVector); + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } + } else { + if (!outputColVector.noNulls) { - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, value, vector[i], outputColVector); + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; } - } else { for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, value, vector[i], outputColVector); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - - // copy isNull entry first because the operation may overwrite it - outputIsNull[i] = inputIsNull[i]; - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - - // copy isNull entries first because the operation may overwrite them - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - - /* - * Null data entries are not set to a special non-zero value because all null math operations - * are checked, meaning that a zero-divide always results in a null result anyway. - */ } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt index 1b1db54..573af7f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -53,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -63,9 +71,8 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - boolean[] outNull = outputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; - int n = batch.size; byte[][] vector1 = inputColVector1.vector; byte[][] vector2 = inputColVector2.vector; int[] start1 = inputColVector1.start; @@ -74,20 +81,17 @@ public class extends VectorExpression { int[] length2 = inputColVector2.length; long[] outVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.noNulls = true; + + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; + // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = true; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputColVector.isRepeating = true; - ret = (vector1[0], start1[0], length1[0], + outputIsNull[0] = false; + ret = (vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0]); if (ret) { outVector[0] = 1; @@ -98,7 +102,8 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[0], start1[0], length1[0], + outputIsNull[i] = false; + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -106,8 +111,9 @@ public class extends VectorExpression { } } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -119,7 +125,8 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + outputIsNull[i] = false; + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -127,8 +134,9 @@ public class extends VectorExpression { } } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -139,7 +147,8 @@ public class extends VectorExpression { } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + outputIsNull[i] = false; + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -147,8 +156,9 @@ public class extends VectorExpression { } } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -156,15 +166,18 @@ public class extends VectorExpression { } } } - + // handle case where only input 2 has nulls } else if (inputColVector1.noNulls) { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputColVector.isRepeating = true; - outNull[0] = nullPos2[0]; + outputIsNull[0] = nullPos2[0]; if (!nullPos2[0]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0])) { outVector[0] = 1; } else { @@ -177,21 +190,21 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos2[i]; + outputIsNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { outVector[i] = 0; } - } + } } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos2[i]; + outputIsNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -204,14 +217,14 @@ public class extends VectorExpression { if (nullPos2[0]) { // Entire output vector will be null outputColVector.isRepeating = true; - outNull[0] = true; + outputIsNull[0] = true; return; } if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = false; - if ((vector1[i], start1[i], length1[i], + outputIsNull[i] = false; + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -220,8 +233,8 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - outNull[i] = false; - if ((vector1[i], start1[i], length1[i], + outputIsNull[i] = false; + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -233,21 +246,21 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos2[i]; + outputIsNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { outVector[i] = 0; } - } + } } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos2[i]; + outputIsNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -255,17 +268,20 @@ public class extends VectorExpression { } } } - } + } } - + // handle case where only input 1 has nulls } else if (inputColVector2.noNulls) { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputColVector.isRepeating = true; - outNull[0] = nullPos1[0]; + outputIsNull[0] = nullPos1[0]; if (!nullPos1[0]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0])) { outVector[0] = 1; } else { @@ -276,14 +292,14 @@ public class extends VectorExpression { if (nullPos1[0]) { // Entire output vector will be null outputColVector.isRepeating = true; - outNull[0] = true; + outputIsNull[0] = true; return; } if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = false; - if ((vector1[0], start1[0], length1[0], + outputIsNull[i] = false; + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -292,8 +308,8 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - outNull[i] = false; - if ((vector1[0], start1[0], length1[0], + outputIsNull[i] = false; + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -305,9 +321,9 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos1[i]; + outputIsNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -317,9 +333,9 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos1[i]; + outputIsNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -332,9 +348,9 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos1[i]; + outputIsNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -344,9 +360,9 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos1[i]; + outputIsNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -354,17 +370,20 @@ public class extends VectorExpression { } } } - } + } } - + // handle case where both inputs have nulls } else { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputColVector.isRepeating = true; - outNull[0] = nullPos1[0] || nullPos2[0]; - if (!outNull[0]) { - if ((vector1[0], start1[0], length1[0], + outputIsNull[0] = nullPos1[0] || nullPos2[0]; + if (!outputIsNull[0]) { + if ((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0])) { outVector[0] = 1; } else { @@ -374,15 +393,15 @@ public class extends VectorExpression { } else if (inputColVector1.isRepeating) { if (nullPos1[0]) { outputColVector.isRepeating = true; - outNull[0] = true; + outputIsNull[0] = true; return; } if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos2[i]; + outputIsNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -392,9 +411,9 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos2[i]; + outputIsNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -406,15 +425,15 @@ public class extends VectorExpression { } else if (inputColVector2.isRepeating) { if (nullPos2[0]) { outputColVector.isRepeating = true; - outNull[0] = true; + outputIsNull[0] = true; return; } if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos1[i]; + outputIsNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -424,9 +443,9 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos1[i]; + outputIsNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -439,9 +458,9 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos1[i] || nullPos2[i]; - if (!outNull[i]) { - if ((vector1[i], start1[i], length1[i], + outputIsNull[i] = nullPos1[i] || nullPos2[i]; + if (!outputIsNull[i]) { + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -451,9 +470,9 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos1[i] || nullPos2[i]; - if (!outNull[i]) { - if ((vector1[i], start1[i], length1[i], + outputIsNull[i] = nullPos1[i] || nullPos2[i]; + if (!outputIsNull[i]) { + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt index ca55834..cb6cf4e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.nio.charset.StandardCharsets; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -53,31 +54,36 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNull = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; long[] outVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = false; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.noNulls) { - outputColVector.noNulls = true; + if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; + outputColVector.isRepeating = true; + outputIsNull[0] = false; if ((vector[0], start[0], length[0], value, 0, value.length)) { outVector[0] = 1; } else { @@ -86,6 +92,7 @@ public abstract class extends VectorExpression { } else if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; if ((vector[i], start[i], length[i], value, 0, value.length)) { outVector[i] = 1; } else { @@ -93,6 +100,7 @@ public abstract class extends VectorExpression { } } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { if ((vector[i], start[i], length[i], value, 0, value.length)) { outVector[i] = 1; @@ -102,11 +110,14 @@ public abstract class extends VectorExpression { } } } else { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector.isRepeating) { outputColVector.isRepeating = true; - outNull[0] = nullPos[0]; - if (!nullPos[0]) { + outputIsNull[0] = inputIsNull[0]; + if (!inputIsNull[0]) { if ((vector[0], start[0], length[0], value, 0, value.length)) { outVector[0] = 1; } else { @@ -116,8 +127,8 @@ public abstract class extends VectorExpression { } else if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { if ((vector[i], start[i], length[i], value, 0, value.length)) { outVector[i] = 1; } else { @@ -126,9 +137,10 @@ public abstract class extends VectorExpression { } } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - outNull[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { if ((vector[i], start[i], length[i], value, 0, value.length)) { outVector[i] = 1; } else { diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt index ecb4d2a..74d8b48 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.nio.charset.StandardCharsets; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -54,30 +55,35 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNull = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - long[] outVector = outputColVector.vector; + long[] outVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; + if (inputColVector.noNulls) { - outputColVector.noNulls = true; + if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; + outputColVector.isRepeating = true; + outputIsNull[0] = false; if ((value, 0, value.length, vector[0], start[0], length[0])) { outVector[0] = 1; } else { @@ -86,6 +92,7 @@ public abstract class extends VectorExpression { } else if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; if ((value, 0, value.length, vector[i], start[i], length[i])) { outVector[i] = 1; } else { @@ -93,6 +100,7 @@ public abstract class extends VectorExpression { } } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { if ((value, 0, value.length, vector[i], start[i], length[i])) { outVector[i] = 1; @@ -102,11 +110,14 @@ public abstract class extends VectorExpression { } } } else { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector.isRepeating) { outputColVector.isRepeating = true; - outNull[0] = nullPos[0]; - if (!nullPos[0]) { + outputIsNull[0] = inputIsNull[0]; + if (!inputIsNull[0]) { if ((value, 0, value.length, vector[0], start[0], length[0])) { outVector[0] = 1; } else { @@ -116,8 +127,8 @@ public abstract class extends VectorExpression { } else if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { if ((value, 0, value.length, vector[i], start[i], length[i])) { outVector[i] = 1; } else { @@ -126,9 +137,9 @@ public abstract class extends VectorExpression { } } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - outNull[i] = nullPos[i]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((value, 0, value.length, vector[i], start[i], length[i])) { outVector[i] = 1; } else { diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt index a27da10..27d8a3d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt @@ -60,6 +60,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,21 +80,12 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt index 9f708e2..799daf2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -61,6 +62,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,52 +81,91 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + inputColVector1.asScratch(0), value, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - dtm.( - inputColVector1.asScratch(0), value, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { + if (inputColVector1.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { dtm.( inputColVector1.asScratch(i), value, outputColVector.getScratch()); outputColVector.setFromScratch(i); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt index b3d9a4b..f894bcf 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -73,21 +79,12 @@ public class extends VectorExpression { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt index e49f614..0e2cd13 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -57,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -70,52 +78,90 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + inputColVector1.asScratchTimestamp(0), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector1.isRepeating) { - dtm.( - inputColVector1.asScratchTimestamp(0), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { + if (inputColVector1.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { dtm.( inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); outputColVector.setFromScratchTimestamp(i); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt index 95e7271..4240994 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt @@ -58,6 +58,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -72,19 +78,10 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt index 6baa72a..bcb8fd1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -59,6 +60,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -72,52 +79,91 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + inputColVector1.asScratch(0), value, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - dtm.( - inputColVector1.asScratch(0), value, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { + if (inputColVector1.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { dtm.( inputColVector1.asScratch(i), value, outputColVector.getScratch()); outputColVector.setFromScratch(i); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt index 54a1a37..7e65b9b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,21 +66,12 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; [] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt index 3bb95dd..b1e92e0 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -54,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,61 +70,85 @@ public class extends VectorExpression { TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.(0) value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.(0) value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector.(i) value ? 1 : 0; - } + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = + inputColVector.(i) value ? 1 : 0; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = + inputColVector.(i) value ? 1 : 0; + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.(i) value ? 1 : 0; + outputVector[i] = + inputColVector.(i) value ? 1 : 0; } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inputColVector.(0) value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector.(i) value ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector.(i) value ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt index 3db5d01..b81b805 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt @@ -55,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -67,20 +73,11 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt index 1ee7b11..cee680a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt @@ -18,10 +18,12 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,61 +73,82 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector1.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector1.noNulls; if (inputColVector1.noNulls) { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; - } + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; } } - } else { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt index 509f264..b50cbc8 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -74,6 +75,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -87,34 +94,59 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { + if (inputColVector2.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i);; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.( @@ -122,24 +154,39 @@ public class extends VectorExpression { outputColVector.setFromScratch(i); } } - } else { /* there are nulls */ + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt index 2de3044..9db7b53 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -73,6 +74,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -86,59 +93,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchIntervalYearMonth2.set((int) vector2[0]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - scratchIntervalYearMonth2.set((int) vector2[0]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { + if (inputColVector2.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { scratchIntervalYearMonth2.set((int) vector2[i]); dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); outputColVector.setFromScratchTimestamp(i); } } - } else { /* there are nulls */ + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt index 4ed80d1..e860e4d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -71,6 +72,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -84,53 +91,91 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + value, inputColVector2.asScratch(0), outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - dtm.( - value, inputColVector2.asScratch(0), outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { + if (inputColVector2.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - } + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); + value, inputColVector2.asScratch(i), outputColVector.getScratch()); outputColVector.setFromScratch(i); - } } - } else { /* there are nulls */ + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt index 6cca0bb..10f6162 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt @@ -18,10 +18,12 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -57,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -67,61 +75,84 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector2.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector2.noNulls; if (inputColVector2.noNulls) { - if (inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; - } + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } + } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; } } - } else { - if (inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionEvaluation.txt ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionEvaluation.txt index 4ab3e76..3c8f8822 100644 --- ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionEvaluation.txt +++ ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionEvaluation.txt @@ -48,9 +48,13 @@ || inputColumnVector1.isRepeating && inputColumnVector2.isRepeating, outputColumnVector.isRepeating); + /* + We no longer set noNulls to the input ColumnVector's value since that doesn't work + for scratch column reuse. assertEquals( "Output column vector no nulls state does not match operand columns", inputColumnVector1.noNulls && inputColumnVector2.noNulls, outputColumnVector.noNulls); + */ //if repeating, only the first value matters if(!outputColumnVector.noNulls && !outputColumnVector.isRepeating) { diff --git ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionCheckedEvaluation.txt ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionCheckedEvaluation.txt index df4f89d..c918f3b 100644 --- ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionCheckedEvaluation.txt +++ ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionCheckedEvaluation.txt @@ -46,9 +46,13 @@ "Output column vector is repeating state does not match operand column", inputColumnVector.isRepeating, outputColumnVector.isRepeating); + /* + We no longer set noNulls to the input ColumnVector's value since that doesn't work + for scratch column reuse. assertEquals( "Output column vector no nulls state does not match operand column", inputColumnVector.noNulls, outputColumnVector.noNulls); + */ if(!outputColumnVector.noNulls && !outputColumnVector.isRepeating) { for(int i = 0; i < BATCH_SIZE; i++) { diff --git ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt index e5f3f18..991135c 100644 --- ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt +++ ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt @@ -45,9 +45,13 @@ "Output column vector is repeating state does not match operand column", inputColumnVector.isRepeating, outputColumnVector.isRepeating); + /* + We no longer set noNulls to the input ColumnVector's value since that doesn't work + for scratch column reuse. assertEquals( "Output column vector no nulls state does not match operand column", inputColumnVector.noNulls, outputColumnVector.noNulls); + */ if(!outputColumnVector.noNulls && !outputColumnVector.isRepeating) { for(int i = 0; i < BATCH_SIZE; i++) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt index 733731f..fc3d01f 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt @@ -320,7 +320,7 @@ public class extends VectorAggregateExpression { [] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; @@ -493,9 +493,12 @@ public class extends VectorAggregateExpression { #IF PARTIAL1 ColumnVector[] fields = outputColVector.fields; + fields[AVERAGE_COUNT_FIELD_INDEX].isNull[batchIndex] = false; ((LongColumnVector) fields[AVERAGE_COUNT_FIELD_INDEX]).vector[batchIndex] = myagg.count; + fields[AVERAGE_SUM_FIELD_INDEX].isNull[batchIndex] = false; ((DoubleColumnVector) fields[AVERAGE_SUM_FIELD_INDEX]).vector[batchIndex] = myagg.sum; + // NULL out useless source field. ColumnVector sourceColVector = (ColumnVector) fields[AVERAGE_SOURCE_FIELD_INDEX]; sourceColVector.isRepeating = true; sourceColVector.noNulls = false; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt index 6e42598..fa72171 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt @@ -359,7 +359,7 @@ public class extends VectorAggregateExpression { HiveDecimalWritable[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum.setFromLong(0L); @@ -519,9 +519,12 @@ public class extends VectorAggregateExpression { #IF PARTIAL1 ColumnVector[] fields = outputColVector.fields; + fields[AVERAGE_COUNT_FIELD_INDEX].isNull[batchIndex] = false; ((LongColumnVector) fields[AVERAGE_COUNT_FIELD_INDEX]).vector[batchIndex] = myagg.count; + fields[AVERAGE_SUM_FIELD_INDEX].isNull[batchIndex] = false; ((DecimalColumnVector) fields[AVERAGE_SUM_FIELD_INDEX]).vector[batchIndex].set(myagg.sum); + // NULL out useless source field. ColumnVector sourceColVector = (ColumnVector) fields[AVERAGE_SOURCE_FIELD_INDEX]; sourceColVector.isRepeating = true; sourceColVector.noNulls = false; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal64ToDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal64ToDecimal.txt index d5325c3..53dceeb 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal64ToDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal64ToDecimal.txt @@ -371,7 +371,7 @@ public class extends VectorAggregateExpression { long[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { final long value = vector[0]; for (int i = 0; i < batchSize; i++) { myagg.avgValue(value); @@ -525,10 +525,13 @@ public class extends VectorAggregateExpression { #IF PARTIAL1 ColumnVector[] fields = outputColVector.fields; + fields[AVERAGE_COUNT_FIELD_INDEX].isNull[batchIndex] = false; ((LongColumnVector) fields[AVERAGE_COUNT_FIELD_INDEX]).vector[batchIndex] = myagg.count; + fields[AVERAGE_SUM_FIELD_INDEX].isNull[batchIndex] = false; ((DecimalColumnVector) fields[AVERAGE_SUM_FIELD_INDEX]).set( batchIndex, myagg.regularDecimalSum); + // NULL out useless source field. ColumnVector sourceColVector = (ColumnVector) fields[AVERAGE_SOURCE_FIELD_INDEX]; sourceColVector.isRepeating = true; sourceColVector.noNulls = false; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt index 8ab393c..e273d07 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt @@ -368,7 +368,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputStructColVector.isRepeating) { - if (inputStructColVector.noNulls) { + if (inputStructColVector.noNulls || !inputStructColVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.mergeSum.setFromLong(0L); @@ -529,9 +529,12 @@ public class extends VectorAggregateExpression { #IF PARTIAL2 ColumnVector[] fields = outputColVector.fields; + fields[AVERAGE_COUNT_FIELD_INDEX].isNull[batchIndex] = false; ((LongColumnVector) fields[AVERAGE_COUNT_FIELD_INDEX]).vector[batchIndex] = myagg.mergeCount; + fields[AVERAGE_SUM_FIELD_INDEX].isNull[batchIndex] = false; ((DecimalColumnVector) fields[AVERAGE_SUM_FIELD_INDEX]).vector[batchIndex].set(myagg.mergeSum); + // NULL out useless source field. ColumnVector sourceColVector = (ColumnVector) fields[AVERAGE_SOURCE_FIELD_INDEX]; sourceColVector.isRepeating = true; sourceColVector.noNulls = false; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt index be2fadd..162d1ba 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt @@ -334,7 +334,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputStructColVector.isRepeating) { - if (inputStructColVector.noNulls) { + if (inputStructColVector.noNulls || !inputStructColVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.mergeCount = 0; @@ -507,9 +507,12 @@ public class extends VectorAggregateExpression { #IF PARTIAL2 ColumnVector[] fields = outputColVector.fields; + fields[AVERAGE_COUNT_FIELD_INDEX].isNull[batchIndex] = false; ((LongColumnVector) fields[AVERAGE_COUNT_FIELD_INDEX]).vector[batchIndex] = myagg.mergeCount; + fields[AVERAGE_SUM_FIELD_INDEX].isNull[batchIndex] = false; ((DoubleColumnVector) fields[AVERAGE_SUM_FIELD_INDEX]).vector[batchIndex] = myagg.mergeSum; + // NULL out useless source field. ColumnVector sourceColVector = (ColumnVector) fields[AVERAGE_SOURCE_FIELD_INDEX]; sourceColVector.isRepeating = true; sourceColVector.noNulls = false; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt index 6190a9e..abb7b22 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt @@ -317,7 +317,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColVector.isRepeating) { - if (inputColVector.noNulls) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; @@ -493,9 +493,12 @@ public class extends VectorAggregateExpression { #IF PARTIAL1 ColumnVector[] fields = outputColVector.fields; + fields[AVERAGE_COUNT_FIELD_INDEX].isNull[batchIndex] = false; ((LongColumnVector) fields[AVERAGE_COUNT_FIELD_INDEX]).vector[batchIndex] = myagg.count; + fields[AVERAGE_SUM_FIELD_INDEX].isNull[batchIndex] = false; ((DoubleColumnVector) fields[AVERAGE_SUM_FIELD_INDEX]).vector[batchIndex] = myagg.sum; + // NULL out useless source field. ColumnVector sourceColVector = (ColumnVector) fields[AVERAGE_SOURCE_FIELD_INDEX]; sourceColVector.isRepeating = true; sourceColVector.noNulls = false; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt index fd54256..2df45bb 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt @@ -314,7 +314,7 @@ public class extends VectorAggregateExpression { [] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { myagg.minmaxValue(vector[0]); } return; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt index 4764a45..9fe85d3 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt @@ -318,7 +318,7 @@ public class extends VectorAggregateExpression { HiveDecimalWritable[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls && + if ((inputVector.noNulls || !inputVector.isNull[0]) && (myagg.isNull || (myagg.value.compareTo(vector[0]) 0))) { myagg.isNull = false; myagg.value.set(vector[0]); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt index 4680161..9a0a6e7 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt @@ -307,7 +307,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColVector.isRepeating) { - if (inputColVector.noNulls && + if ((inputColVector.noNulls || !inputColVector.isNull[0]) && (myagg.isNull || (inputColVector.compareTo(myagg.value, 0) 0))) { myagg.isNull = false; inputColVector.intervalDayTimeUpdate(myagg.value, 0); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt index 027688d..4f0b5a5 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt @@ -278,7 +278,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColumn.isRepeating) { - if (inputColumn.noNulls) { + if (inputColumn.noNulls || !inputColumn.isNull[0]) { myagg.checkValue(inputColumn.vector[0], inputColumn.start[0], inputColumn.length[0]); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt index 370b6a8..579437e 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt @@ -309,7 +309,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColVector.isRepeating) { - if (inputColVector.noNulls && + if ((inputColVector.noNulls || !inputColVector.isNull[0]) && (myagg.isNull || (inputColVector.compareTo(myagg.value, 0) 0))) { myagg.isNull = false; inputColVector.timestampUpdate(myagg.value, 0); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt index 3e3d070..c731869 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt @@ -311,7 +311,7 @@ public class extends VectorAggregateExpression { [] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt index cb9c962..876ead5 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt @@ -311,7 +311,7 @@ public class extends VectorAggregateExpression { [] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { iterateRepeatingNoNulls(myagg, vector[0], batchSize); } } diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt index 3d03c09..cf19b14 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt @@ -311,7 +311,7 @@ public class extends VectorAggregateExpression { HiveDecimalWritable[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { iterateRepeatingNoNulls(myagg, vector[0], inputVector.scale, batchSize); } } diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt index 570d771..9b1c1cd 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt @@ -371,7 +371,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputStructColVector.isRepeating) { - if (inputStructColVector.noNulls) { + if (inputStructColVector.noNulls || !inputStructColVector.isNull[0]) { final long count = countVector[0]; final double sum = sumVector[0]; final double variance = varianceVector[0]; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt index d6cd505..1dd5ab4 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt @@ -290,7 +290,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColVector.isRepeating) { - if (inputColVector.noNulls) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { iterateRepeatingNoNulls(myagg, inputColVector.getDouble(0), batchSize); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index 5db3b07..bedc12a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -210,12 +210,14 @@ void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBa if (inColVector.isRepeating) { if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.isNull[outBatchIndex] = false; outColVector.setElement(outBatchIndex, 0, inColVector); } else { VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); } } else { if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.isNull[outBatchIndex] = false; outColVector.setElement(outBatchIndex, inBatchIndex, inColVector); } else { VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); @@ -237,12 +239,14 @@ void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBa if (inColVector.isRepeating) { if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.isNull[outBatchIndex] = false; outColVector.setElement(outBatchIndex, 0, inColVector); } else { VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); } } else { if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.isNull[outBatchIndex] = false; outColVector.setElement(outBatchIndex, inBatchIndex, inColVector); } else { VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java index 5d1db4d..6ae6727 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java @@ -150,7 +150,7 @@ public void copyGroupKey(VectorizedRowBatch inputBatch, VectorizedRowBatch outpu TimestampColumnVector inputColumnVector = (TimestampColumnVector) inputBatch.cols[inputColumnNum]; TimestampColumnVector outputColumnVector = (TimestampColumnVector) outputBatch.cols[outputColumnNum]; if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) { - + outputColumnVector.isNull[outputBatch.size] = false; outputColumnVector.setElement(outputBatch.size, 0, inputColumnVector); } else { outputColumnVector.noNulls = false; @@ -163,7 +163,7 @@ public void copyGroupKey(VectorizedRowBatch inputBatch, VectorizedRowBatch outpu IntervalDayTimeColumnVector inputColumnVector = (IntervalDayTimeColumnVector) inputBatch.cols[inputColumnNum]; IntervalDayTimeColumnVector outputColumnVector = (IntervalDayTimeColumnVector) outputBatch.cols[outputColumnNum]; if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) { - + outputColumnVector.isNull[outputBatch.size] = false; outputColumnVector.setElement(outputBatch.size, 0, inputColumnVector); } else { outputColumnVector.noNulls = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java index 74ca9d6..2b401ac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java @@ -110,7 +110,7 @@ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { assignLongNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { assignLongNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if (columnVector.noNulls && columnVector.isRepeating) { + } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { assignLongNoNullsRepeating(i, batch.size, columnVector); } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { assignLongNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); @@ -132,7 +132,7 @@ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { assignDoubleNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { assignDoubleNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if (columnVector.noNulls && columnVector.isRepeating) { + } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { assignDoubleNoNullsRepeating(i, batch.size, columnVector); } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { assignDoubleNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); @@ -154,7 +154,7 @@ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { assignStringNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { assignStringNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if (columnVector.noNulls && columnVector.isRepeating) { + } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { assignStringNoNullsRepeating(i, batch.size, columnVector); } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { assignStringNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); @@ -176,7 +176,7 @@ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { assignDecimalNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { assignDecimalNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if (columnVector.noNulls && columnVector.isRepeating) { + } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { assignDecimalNoNullsRepeating(i, batch.size, columnVector); } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { assignDecimalNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); @@ -198,7 +198,7 @@ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { assignTimestampNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { assignTimestampNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if (columnVector.noNulls && columnVector.isRepeating) { + } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { assignTimestampNoNullsRepeating(i, batch.size, columnVector); } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { assignTimestampNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); @@ -220,7 +220,7 @@ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { assignIntervalDayTimeNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { assignIntervalDayTimeNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if (columnVector.noNulls && columnVector.isRepeating) { + } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { assignIntervalDayTimeNoNullsRepeating(i, batch.size, columnVector); } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { assignIntervalDayTimeNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); @@ -262,7 +262,7 @@ public void evaluateBatchGroupingSets(VectorizedRowBatch batch, assignLongNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { assignLongNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if (columnVector.noNulls && columnVector.isRepeating) { + } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { assignLongNoNullsRepeating(i, batch.size, columnVector); } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { assignLongNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); @@ -291,7 +291,7 @@ public void evaluateBatchGroupingSets(VectorizedRowBatch batch, assignDoubleNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { assignDoubleNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if (columnVector.noNulls && columnVector.isRepeating) { + } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { assignDoubleNoNullsRepeating(i, batch.size, columnVector); } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { assignDoubleNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); @@ -320,7 +320,7 @@ public void evaluateBatchGroupingSets(VectorizedRowBatch batch, assignStringNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { assignStringNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if (columnVector.noNulls && columnVector.isRepeating) { + } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { assignStringNoNullsRepeating(i, batch.size, columnVector); } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { assignStringNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); @@ -349,7 +349,7 @@ public void evaluateBatchGroupingSets(VectorizedRowBatch batch, assignDecimalNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { assignDecimalNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if (columnVector.noNulls && columnVector.isRepeating) { + } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { assignDecimalNoNullsRepeating(i, batch.size, columnVector); } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { assignDecimalNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); @@ -378,7 +378,7 @@ public void evaluateBatchGroupingSets(VectorizedRowBatch batch, assignTimestampNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { assignTimestampNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if (columnVector.noNulls && columnVector.isRepeating) { + } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { assignTimestampNoNullsRepeating(i, batch.size, columnVector); } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { assignTimestampNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); @@ -407,7 +407,7 @@ public void evaluateBatchGroupingSets(VectorizedRowBatch batch, assignIntervalDayTimeNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { assignIntervalDayTimeNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if (columnVector.noNulls && columnVector.isRepeating) { + } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { assignIntervalDayTimeNoNullsRepeating(i, batch.size, columnVector); } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { assignIntervalDayTimeNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index d1b52c6..4df6e97 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2942,6 +2942,20 @@ private VectorExpression getWhenExpression(List childExpr, childExpr.subList(2, childExpr.size())); } + if (isNullConst(thenDesc) && isNullConst(elseDesc)) { + + // THEN NULL ELSE NULL: An unusual "case", but possible. + final int outputColumnNum = ocm.allocateOutputColumn(returnType); + + final VectorExpression resultExpr = + new IfExprNullNull( + outputColumnNum); + + resultExpr.setOutputTypeInfo(returnType); + resultExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + + return resultExpr; + } if (isNullConst(thenDesc)) { final VectorExpression whenExpr = getVectorExpression(whenDesc, mode); final VectorExpression elseExpr = getVectorExpression(elseDesc, mode); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 66de847..44b7c95 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -97,19 +97,6 @@ public static void setNullColIsNullValue(ColumnVector cv, int rowIndex) { } /** - * Iterates thru all the column vectors and sets noNull to - * specified value. - * - * @param batch - * Batch on which noNull is set - */ - public static void setNoNullFields(VectorizedRowBatch batch) { - for (int i = 0; i < batch.numCols; i++) { - batch.cols[i].noNulls = true; - } - } - - /** * Iterates thru all the column vectors and sets repeating to * specified column. * diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 0e703a5..ff55f50 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -423,7 +423,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Boolean) value == true ? 1 : 0); - lcv.isNull[0] = false; } } break; @@ -436,7 +435,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Byte) value); - lcv.isNull[0] = false; } } break; @@ -449,7 +447,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Short) value); - lcv.isNull[0] = false; } } break; @@ -462,7 +459,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Integer) value); - lcv.isNull[0] = false; } } break; @@ -475,7 +471,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Long) value); - lcv.isNull[0] = false; } } break; @@ -488,7 +483,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill(DateWritable.dateToDays((Date) value)); - lcv.isNull[0] = false; } } break; @@ -501,7 +495,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Timestamp) value); - lcv.isNull[0] = false; } } break; @@ -514,7 +507,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill(((HiveIntervalYearMonth) value).getTotalMonths()); - lcv.isNull[0] = false; } } @@ -526,7 +518,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue icv.isRepeating = true; } else { icv.fill(((HiveIntervalDayTime) value)); - icv.isNull[0] = false; } } @@ -538,7 +529,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue dcv.isRepeating = true; } else { dcv.fill((Float) value); - dcv.isNull[0] = false; } } break; @@ -551,7 +541,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue dcv.isRepeating = true; } else { dcv.fill((Double) value); - dcv.isNull[0] = false; } } break; @@ -563,10 +552,7 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue dv.isNull[0] = true; dv.isRepeating = true; } else { - HiveDecimal hd = (HiveDecimal) value; - dv.set(0, hd); - dv.isRepeating = true; - dv.isNull[0] = false; + dv.fill((HiveDecimal) value); } } break; @@ -580,7 +566,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue bcv.isRepeating = true; } else { bcv.fill(bytes); - bcv.isNull[0] = false; } } break; @@ -595,8 +580,7 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue bcv.isNull[0] = true; bcv.isRepeating = true; } else { - bcv.setVal(0, sVal.getBytes()); - bcv.isRepeating = true; + bcv.fill(sVal.getBytes()); } } break; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java index c15bdc1..f1a584e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -58,7 +60,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -67,39 +68,73 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setDays(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setDays(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - setDays(outputColVector, vector, i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputIsNull[i] = false; + // Set isNull before call in case it changes it mind. + setDays(outputColVector, vector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + setDays(outputColVector, vector, i); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { + // Set isNull before call in case it changes it mind. setDays(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setDays(outputColVector, vector, i); + // Set isNull before call in case it changes it mind. outputIsNull[i] = inputIsNull[i]; + setDays(outputColVector, vector, i); } } else { + // Set isNull before calls in case tney change their mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { setDays(outputColVector, vector, i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java index a2e4a52..8326002 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -52,9 +54,9 @@ public CastDecimalToDecimal() { * Convert input decimal value to a decimal with a possibly different precision and scale, * at position i in the respective vectors. */ - protected void convert(DecimalColumnVector outV, DecimalColumnVector inV, int i) { + protected void convert(DecimalColumnVector outputColVector, DecimalColumnVector inputColVector, int i) { // The set routine enforces precision and scale. - outV.vector[i].set(inV.vector[i]); + outputColVector.vector[i].set(inputColVector.vector[i]); } /** @@ -70,10 +72,12 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -81,51 +85,82 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - convert(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - convert(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + outputColVector.isRepeating = true; + if (inputColVector.noNulls || !inputColVector.isNull[0]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[0] = false; + convert(outputColVector, inputColVector, 0); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + return; + } + + if (inputColVector.noNulls) { + + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + convert(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + convert(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - convert(outV, inV, i); + convert(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - convert(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - convert(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + convert(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - convert(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + convert(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java index aa529ed..7ad0493 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java @@ -64,6 +64,7 @@ protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) { outV.noNulls = false; return; } + outV.isNull[i] = false; switch (integerPrimitiveCategory) { case BYTE: outV.vector[i] = decWritable.byteValue(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java index 08abf27..5494579 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -56,7 +58,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; double[] vector = inputColVector.vector; @@ -65,39 +66,82 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setDouble(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + setDouble(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - setDouble(outputColVector, vector, i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setDouble(outputColVector, vector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + setDouble(outputColVector, vector, i); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { setDouble(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setDouble(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setDouble(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - setDouble(outputColVector, vector, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setDouble(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java index df25eac..a3c4212 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -56,7 +58,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -65,39 +66,79 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setSeconds(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setSeconds(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - setSeconds(outputColVector, vector, i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setSeconds(outputColVector, vector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + setSeconds(outputColVector, vector, i); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { setSeconds(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setSeconds(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setSeconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { + // Set isNull before call in case it changes it mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - setSeconds(outputColVector, vector, i); + if (!inputIsNull[i]) { + setSeconds(outputColVector, vector, i); + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java index 42c34c8..6a29c62 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -54,7 +56,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -63,39 +64,84 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setMilliseconds(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setMilliseconds(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - setMilliseconds(outputColVector, vector, i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setMilliseconds(outputColVector, vector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + setMilliseconds(outputColVector, vector, i); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { setMilliseconds(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setMilliseconds(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setMilliseconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { + // Set isNull before calls in case they change their mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - setMilliseconds(outputColVector, vector, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setMilliseconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index 34269da..b55712a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -28,6 +28,7 @@ import org.apache.hive.common.util.DateParser; import java.nio.charset.StandardCharsets; +import java.util.Arrays; /** * Casts a string vector to a date vector. @@ -62,7 +63,10 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -70,65 +74,94 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + evaluate(outputColVector, inV, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - evaluate(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - evaluate(outV, inV, i); + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluate(outputColVector, inV, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + evaluate(outputColVector, inV, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - evaluate(outV, inV, i); + evaluate(outputColVector, inV, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - evaluate(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - evaluate(outV, inV, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + evaluate(outputColVector, inV, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - evaluate(outV, inV, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + evaluate(outputColVector, inV, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } } } - private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) { + private void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) { String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8); if (dateParser.parseDate(dateString, sqlDate)) { - outV.vector[i] = DateWritable.dateToDays(sqlDate); + outputColVector.vector[i] = DateWritable.dateToDays(sqlDate); return; } - outV.vector[i] = 1; - outV.isNull[i] = true; - outV.noNulls = false; + outputColVector.vector[i] = 1; + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java index 41443c5..cbefa80 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; @@ -51,7 +53,7 @@ public CastStringToDecimal() { /** * Convert input string to a decimal, at position i in the respective vectors. */ - protected void func(DecimalColumnVector outV, BytesColumnVector inV, int i) { + protected void func(DecimalColumnVector outputColVector, BytesColumnVector inputColVector, int i) { String s; try { @@ -59,13 +61,13 @@ protected void func(DecimalColumnVector outV, BytesColumnVector inV, int i) { * e.g. by converting to decimal from the input bytes directly without * making a new string. */ - s = new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8"); - outV.vector[i].set(HiveDecimal.create(s)); + s = new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], "UTF-8"); + outputColVector.vector[i].set(HiveDecimal.create(s)); } catch (Exception e) { // for any exception in conversion to decimal, produce NULL - outV.noNulls = false; - outV.isNull[i] = true; + outputColVector.noNulls = false; + outputColVector.isNull[i] = true; } } @@ -76,10 +78,13 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -87,51 +92,82 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + func(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + func(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java index 3ea1e8c..9ad442a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -51,25 +53,25 @@ public CastStringToDouble() { /** * Convert input string to a double, at position i in the respective vectors. */ - protected void func(DoubleColumnVector outV, BytesColumnVector inV, int batchIndex) { + protected void func(DoubleColumnVector outputColVector, BytesColumnVector inputColVector, int batchIndex) { - byte[] bytes = inV.vector[batchIndex]; - final int start = inV.start[batchIndex]; - final int length = inV.length[batchIndex]; + byte[] bytes = inputColVector.vector[batchIndex]; + final int start = inputColVector.start[batchIndex]; + final int length = inputColVector.length[batchIndex]; try { if (!LazyUtils.isNumberMaybe(bytes, start, length)) { - outV.noNulls = false; - outV.isNull[batchIndex] = true; - outV.vector[batchIndex] = DoubleColumnVector.NULL_VALUE; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; + outputColVector.vector[batchIndex] = DoubleColumnVector.NULL_VALUE; return; } - outV.vector[batchIndex] = StringToDouble.strtod(bytes, start, length); + outputColVector.vector[batchIndex] = StringToDouble.strtod(bytes, start, length); } catch (Exception e) { // for any exception in conversion to integer, produce NULL - outV.noNulls = false; - outV.isNull[batchIndex] = true; - outV.vector[batchIndex] = DoubleColumnVector.NULL_VALUE; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; + outputColVector.vector[batchIndex] = DoubleColumnVector.NULL_VALUE; } } @@ -80,10 +82,13 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumnNum]; + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -91,51 +96,82 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + func(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + func(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java index feb0ab6..8a64dcf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -55,10 +57,13 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - IntervalDayTimeColumnVector outV = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; + IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -66,64 +71,88 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - evaluate(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - evaluate(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + evaluate(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluate(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + evaluate(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - evaluate(outV, inV, i); + evaluate(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - evaluate(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - evaluate(outV, inV, i); + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + evaluate(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + // Set isNull before calls in case they change their mind. + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - evaluate(outV, inV, i); + if (!inputColVector.isNull[i]) { + evaluate(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } } } - private void evaluate(IntervalDayTimeColumnVector outV, BytesColumnVector inV, int i) { + private void evaluate(IntervalDayTimeColumnVector outputColVector, BytesColumnVector inputColVector, int i) { try { HiveIntervalDayTime interval = HiveIntervalDayTime.valueOf( - new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8")); - outV.set(i, interval); + new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], "UTF-8")); + outputColVector.set(i, interval); } catch (Exception e) { - outV.setNullValue(i); - outV.isNull[i] = true; - outV.noNulls = false; + outputColVector.setNullValue(i); + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java index 09dd4d9..598113f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -53,10 +55,13 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -64,64 +69,96 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - evaluate(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - evaluate(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + evaluate(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluate(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + evaluate(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - evaluate(outV, inV, i); + evaluate(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - evaluate(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - evaluate(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluate(outputColVector, inputColVector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + // Set isNull before calls in case they change their mind. + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - evaluate(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluate(outputColVector, inputColVector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } } } - private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) { + private void evaluate(LongColumnVector outputColVector, BytesColumnVector inputColVector, int i) { try { HiveIntervalYearMonth interval = HiveIntervalYearMonth.valueOf( - new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8")); - outV.vector[i] = interval.getTotalMonths(); + new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], "UTF-8")); + outputColVector.vector[i] = interval.getTotalMonths(); } catch (Exception e) { - outV.vector[i] = 1; - outV.isNull[i] = true; - outV.noNulls = false; + outputColVector.vector[i] = 1; + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java index a6cfee8..e3da77e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -70,11 +72,11 @@ public void transientInit() throws HiveException { /** * Convert input string to a long, at position i in the respective vectors. */ - protected void func(LongColumnVector outV, BytesColumnVector inV, int batchIndex) { + protected void func(LongColumnVector outputColVector, BytesColumnVector inputColVector, int batchIndex) { - byte[] bytes = inV.vector[batchIndex]; - final int start = inV.start[batchIndex]; - final int length = inV.length[batchIndex]; + byte[] bytes = inputColVector.vector[batchIndex]; + final int start = inputColVector.start[batchIndex]; + final int length = inputColVector.length[batchIndex]; try { switch (integerPrimitiveCategory) { @@ -90,8 +92,8 @@ protected void func(LongColumnVector outV, BytesColumnVector inV, int batchIndex booleanValue = true; } else { // No boolean value match for 4 char field. - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } } else if (length == 5) { @@ -103,8 +105,8 @@ protected void func(LongColumnVector outV, BytesColumnVector inV, int batchIndex booleanValue = false; } else { // No boolean value match for 5 char field. - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } } else if (length == 1) { @@ -115,50 +117,50 @@ protected void func(LongColumnVector outV, BytesColumnVector inV, int batchIndex booleanValue = false; } else { // No boolean value match for extended 1 char field. - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } } else { // No boolean value match for other lengths. - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } - outV.vector[batchIndex] = (booleanValue ? 1 : 0); + outputColVector.vector[batchIndex] = (booleanValue ? 1 : 0); } break; case BYTE: if (!LazyUtils.isNumberMaybe(bytes, start, length)) { - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } - outV.vector[batchIndex] = LazyByte.parseByte(bytes, start, length, 10); + outputColVector.vector[batchIndex] = LazyByte.parseByte(bytes, start, length, 10); break; case SHORT: if (!LazyUtils.isNumberMaybe(bytes, start, length)) { - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } - outV.vector[batchIndex] = LazyShort.parseShort(bytes, start, length, 10); + outputColVector.vector[batchIndex] = LazyShort.parseShort(bytes, start, length, 10); break; case INT: if (!LazyUtils.isNumberMaybe(bytes, start, length)) { - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } - outV.vector[batchIndex] = LazyInteger.parseInt(bytes, start, length, 10); + outputColVector.vector[batchIndex] = LazyInteger.parseInt(bytes, start, length, 10); break; case LONG: if (!LazyUtils.isNumberMaybe(bytes, start, length)) { - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } - outV.vector[batchIndex] = LazyLong.parseLong(bytes, start, length, 10); + outputColVector.vector[batchIndex] = LazyLong.parseLong(bytes, start, length, 10); break; default: throw new Error("Unexpected primitive category " + integerPrimitiveCategory); @@ -166,8 +168,8 @@ protected void func(LongColumnVector outV, BytesColumnVector inV, int batchIndex } catch (Exception e) { // for any exception in conversion to integer, produce NULL - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; } } @@ -178,10 +180,13 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -189,51 +194,81 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { + } else /* there are NULLs in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + func(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + func(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java index 1231cda..1836131 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -56,7 +58,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -65,39 +66,51 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = toBool(inputColVector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = toBool(inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = toBool(inputColVector, i); + outputIsNull[i] = false; + outputVector[i] = toBool(inputColVector, i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = toBool(inputColVector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = toBool(inputColVector, i); outputIsNull[i] = inputIsNull[i]; + outputVector[i] = toBool(inputColVector, i); } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = toBool(inputColVector, i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java index e696455..c11797b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -51,7 +53,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; double[] outputVector = outputColVector.vector; @@ -60,39 +61,79 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.getDouble(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.getDouble(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector.getDouble(i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = inputColVector.getDouble(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = inputColVector.getDouble(i); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = inputColVector.getDouble(i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = inputColVector.getDouble(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + outputVector[i] = inputColVector.getDouble(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.getDouble(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + outputVector[i] = inputColVector.getDouble(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java index 36b9f13..a0f0927 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -48,7 +50,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -57,39 +58,79 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.getTimestampAsLong(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.getTimestampAsLong(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector.getTimestampAsLong(i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = inputColVector.getTimestampAsLong(i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = inputColVector.getTimestampAsLong(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + inputIsNull[i] = false; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } else { + inputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.getTimestampAsLong(i); + if (!inputIsNull[i]) { + inputIsNull[i] = false; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } else { + inputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java index 127e431..6fb29a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -66,104 +68,121 @@ public void evaluate(VectorizedRowBatch batch) { return; } + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + long vector1Value = vector1[0]; long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = vector1[0] & vector2[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1Value & vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] & vector2Value; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] & vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; } } - outV.isRepeating = false; } - outV.noNulls = true; - } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { + return; + } + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outV.noNulls = false; + + if (inputColVector1.noNulls && !inputColVector2.noNulls) { // only input 2 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0]; + outputIsNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } else { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } - outV.noNulls = false; } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { // only input 1 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -171,49 +190,46 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1); + outputIsNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); } } - outV.isRepeating = false; } outV.noNulls = false; } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ @@ -223,7 +239,7 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0]) + outputIsNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[0] && (vector2[0] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { @@ -231,32 +247,31 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } @@ -267,21 +282,19 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } - outV.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java index 3542a07..9208cd4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -69,104 +71,120 @@ public void evaluate(VectorizedRowBatch batch) { return; } + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + long vector1Value = vector1[0]; long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = vector1[0] | vector2[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1Value | vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] | vector2Value; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] | vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; } } - outV.isRepeating = false; } - outV.noNulls = true; - } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { + return; + } + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outV.noNulls = false; + + if (inputColVector1.noNulls && !inputColVector2.noNulls) { // only input 2 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0]; + outputIsNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } - outV.noNulls = false; } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { // only input 1 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -174,51 +192,47 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0); + outputIsNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); } } - outV.isRepeating = false; } - outV.noNulls = false; } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ // either input 1 or input 2 may have nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -226,7 +240,7 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0]) + outputIsNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[0] && (vector2[0] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { @@ -234,57 +248,53 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } - outV.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index c7cab2a..5b89131 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -121,70 +121,84 @@ public ConstantVectorExpression(int outputColumnNum, TypeInfo outputTypeInfo, bo isNullValue = isNull; } + /* + * In the following evaluate* methods, since we are supporting scratch column reuse, we must + * assume the column may have noNulls of false and some isNull entries true. + * + * So, do a proper assignments. + */ + private void evaluateLong(VectorizedRowBatch vrg) { + LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { + cv.isNull[0] = false; cv.vector[0] = longValue; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDouble(VectorizedRowBatch vrg) { DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { + cv.isNull[0] = false; cv.vector[0] = doubleValue; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateBytes(VectorizedRowBatch vrg) { BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; cv.initBuffer(); if (!isNullValue) { + cv.isNull[0] = false; cv.setVal(0, bytesValue, 0, bytesValueLength); } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDecimal(VectorizedRowBatch vrg) { DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.vector[0].set(decimalValue); + dcv.isNull[0] = false; + dcv.set(0, decimalValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } private void evaluateTimestamp(VectorizedRowBatch vrg) { - TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; - dcv.isRepeating = true; - dcv.noNulls = !isNullValue; + TimestampColumnVector tcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; + tcv.isRepeating = true; if (!isNullValue) { - dcv.set(0, timestampValue); + tcv.isNull[0] = false; + tcv.set(0, timestampValue); } else { - dcv.isNull[0] = true; + tcv.isNull[0] = true; + tcv.noNulls = false; } } private void evaluateIntervalDayTime(VectorizedRowBatch vrg) { IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { + dcv.isNull[0] = false; dcv.set(0, intervalDayTimeValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java index d91b09c..7342d9e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java @@ -82,12 +82,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java index 2699681..3ea189a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -73,8 +74,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; int n = batch.size; long[] vector1 = inputColVector1.vector; @@ -83,43 +82,81 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector1.isRepeating) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); - dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); - outputColVector.setFromScratchIntervalDayTime(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); + dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); + outputColVector.setFromScratchIntervalDayTime(0); + + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } + + if (inputColVector1.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); - outputColVector.setFromScratchIntervalDayTime(i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); + outputColVector.setFromScratchIntervalDayTime(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); + outputColVector.setFromScratchIntervalDayTime(i); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); - outputIsNull[i] = inputIsNull[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java index 946b738..a87ae39 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -75,8 +76,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; int n = batch.size; long[] vector2 = inputColVector2.vector; @@ -86,43 +85,61 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector2.isRepeating) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); - dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); - outputColVector.setFromScratchIntervalDayTime(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); + dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); + outputColVector.setFromScratchIntervalDayTime(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } + + if (inputColVector2.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } - } else { /* there are nulls */ + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); - outputIsNull[i] = inputIsNull[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java index 9a8177c..a677f90 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java @@ -24,10 +24,9 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import java.sql.Timestamp; import java.util.Arrays; import java.util.HashSet; @@ -80,8 +79,8 @@ public void evaluate(VectorizedRowBatch batch) { DecimalColumnVector inputColumnVector = (DecimalColumnVector) batch.cols[inputColumn]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColumnVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColumnVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; HiveDecimalWritable[] vector = inputColumnVector.vector; long[] outputVector = outputColVector.vector; @@ -91,49 +90,68 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColumnVector.noNulls; - if (inputColumnVector.noNulls) { - if (inputColumnVector.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColumnVector.isRepeating) { + if (inputColumnVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = inSet.contains(vector[0]) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColumnVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } - } else { - if (inputColumnVector.isRepeating) { - - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.contains(vector[0]) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java index 791d8f2..452bd5e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -43,7 +45,7 @@ public DecimalToStringUnaryUDF() { inputColumn = -1; } - abstract protected void func(BytesColumnVector outV, DecimalColumnVector inV, int i); + abstract protected void func(BytesColumnVector outputColVector, DecimalColumnVector inputColVector, int i); @Override public void evaluate(VectorizedRowBatch batch) { @@ -52,62 +54,86 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; - outV.initBuffer(); + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; + outputColVector.initBuffer(); + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { //Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java index ba83b6a..89e58f1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java @@ -65,8 +65,8 @@ public void evaluate(VectorizedRowBatch batch) { DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; double[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -76,49 +76,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java index c8b1dad..b33046e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -44,7 +46,7 @@ public DoubleToStringUnaryUDF() { inputColumn = -1; } - abstract protected void func(BytesColumnVector outV, double[] vector, int i); + abstract protected void func(BytesColumnVector outputColVector, double[] vector, int i); @Override public void evaluate(VectorizedRowBatch batch) { @@ -57,59 +59,83 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; double[] vector = inputColVector.vector; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; - outV.initBuffer(); + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.initBuffer(); + boolean[] inputIsNull = inputColVector.isNull; if (n == 0) { //Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - func(outV, vector, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, vector, i); + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, vector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, vector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, vector, i); + func(outputColVector, vector, i); } - outV.isRepeating = false; } - } else { + } else /* there are NULLs in the inputColVector */ { // Handle case with nulls. Don't do function if the value is null, // because the data may be undefined for a null value. - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - func(outV, vector, 0); - } - } else if (batch.selectedInUse) { + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inputColVector.isNull[i]; + outputColVector.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { - func(outV, vector, i); + func(outputColVector, vector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { - func(outV, vector, i); + func(outputColVector, vector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java index 1c1bc0b..252a816 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java @@ -77,73 +77,73 @@ public DynamicValueVectorExpression(int outputColumnNum, TypeInfo typeInfo, private void evaluateLong(VectorizedRowBatch vrg) { LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { - cv.vector[0] = longValue; cv.isNull[0] = false; + cv.vector[0] = longValue; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDouble(VectorizedRowBatch vrg) { DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { - cv.vector[0] = doubleValue; cv.isNull[0] = false; + cv.vector[0] = doubleValue; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateBytes(VectorizedRowBatch vrg) { BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; cv.initBuffer(); if (!isNullValue) { - cv.setVal(0, bytesValue, 0, bytesValueLength); cv.isNull[0] = false; + cv.setVal(0, bytesValue, 0, bytesValueLength); } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDecimal(VectorizedRowBatch vrg) { DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.vector[0].set(decimalValue); dcv.isNull[0] = false; + dcv.set(0, decimalValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } private void evaluateTimestamp(VectorizedRowBatch vrg) { TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.set(0, timestampValue); dcv.isNull[0] = false; + dcv.set(0, timestampValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } private void evaluateIntervalDayTime(VectorizedRowBatch vrg) { IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.set(0, intervalDayTimeValue); dcv.isNull[0] = false; + dcv.set(0, intervalDayTimeValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java index 28d800e..2d8becf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -43,7 +45,7 @@ public FuncDecimalToDouble() { inputColumn = -1; } - abstract protected void func(DoubleColumnVector outV, DecimalColumnVector inV, int i); + abstract protected void func(DoubleColumnVector outputColVector, DecimalColumnVector inputColVector, int i); @Override public void evaluate(VectorizedRowBatch batch) { @@ -52,10 +54,13 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumnNum]; + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -63,51 +68,72 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java index 5fb9778..0ef3da0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -51,7 +53,7 @@ public FuncDecimalToLong() { inputColumn = -1; } - abstract protected void func(LongColumnVector outV, DecimalColumnVector inV, int i); + abstract protected void func(LongColumnVector outputColVector, DecimalColumnVector inputColVector, int i); @Override public void evaluate(VectorizedRowBatch batch) { @@ -65,10 +67,13 @@ public void evaluate(VectorizedRowBatch batch) { integerPrimitiveCategoryKnown = true; } - DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -76,51 +81,72 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java index f518f39..8324506 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -44,7 +46,7 @@ public FuncDecimalToTimestamp() { inputColumn = -1; } - abstract protected void func(TimestampColumnVector outV, DecimalColumnVector inV, int i); + abstract protected void func(TimestampColumnVector outputColVector, DecimalColumnVector inputColVector, int i); @Override public void evaluate(VectorizedRowBatch batch) { @@ -53,10 +55,13 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - TimestampColumnVector outV = (TimestampColumnVector) batch.cols[outputColumnNum]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -64,51 +69,72 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java index e632ff9..b67632a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -43,7 +45,7 @@ public FuncDoubleToDecimal() { inputColumn = -1; } - abstract protected void func(DecimalColumnVector outV, DoubleColumnVector inV, int i); + abstract protected void func(DecimalColumnVector outputColVector, DoubleColumnVector inputColVector, int i); @Override public void evaluate(VectorizedRowBatch batch) { @@ -52,10 +54,13 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - DoubleColumnVector inV = (DoubleColumnVector) batch.cols[inputColumn]; + DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -63,51 +68,72 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java index d500612..1b8707e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -43,7 +45,7 @@ public FuncLongToDecimal() { inputColumn = -1; } - abstract protected void func(DecimalColumnVector outV, LongColumnVector inV, int i); + abstract protected void func(DecimalColumnVector outputColVector, LongColumnVector inputColVector, int i); @Override public void evaluate(VectorizedRowBatch batch) { @@ -52,10 +54,13 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - LongColumnVector inV = (LongColumnVector) batch.cols[inputColumn]; + LongColumnVector inputColVector = (LongColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -63,51 +68,72 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java index f93dbfc..733444e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -70,58 +71,83 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; long[] vector = inputColVector.vector; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; - outV.initBuffer(); + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; + outputColVector.initBuffer(); + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { //Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + prepareResult(0, vector, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - prepareResult(0, vector, outV); - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - prepareResult(i, vector, outV); + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + prepareResult(i, vector, outputColVector); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + prepareResult(i, vector, outputColVector); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - prepareResult(i, vector, outV); + prepareResult(i, vector, outputColVector); } - outV.isRepeating = false; } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - prepareResult(0, vector, outV); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; + outputColVector.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { - prepareResult(i, vector, outV); + prepareResult(i, vector, outputColVector); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { + outputColVector.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { - prepareResult(i, vector, outV); + prepareResult(i, vector, outputColVector); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } @@ -129,7 +155,7 @@ public void evaluate(VectorizedRowBatch batch) { /* Evaluate result for position i (using bytes[] to avoid storage allocation costs) * and set position i of the output vector to the result. */ - abstract void prepareResult(int i, long[] vector, BytesColumnVector outV); + abstract void prepareResult(int i, long[] vector, BytesColumnVector outputColVector); @Override public String vectorExpressionParameters() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java index 1a94408..aebfa25 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; import java.util.Random; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -55,8 +56,12 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; double[] outputVector = outputColVector.vector; - outputColVector.noNulls = true; outputColVector.isRepeating = false; + boolean[] outputIsNull = outputColVector.isNull; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ // return immediately if batch is empty if (n == 0) { @@ -64,11 +69,30 @@ public void evaluate(VectorizedRowBatch batch) { } if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = random.nextDouble(); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = random.nextDouble(); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = random.nextDouble(); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = random.nextDouble(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java index d289dff..f0d7c60 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; import java.util.Random; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -55,8 +56,12 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; double[] outputVector = outputColVector.vector; - outputColVector.noNulls = true; outputColVector.isRepeating = false; + boolean[] outputIsNull = outputColVector.isNull; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ // return immediately if batch is empty if (n == 0) { @@ -64,11 +69,30 @@ public void evaluate(VectorizedRowBatch batch) { } if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = random.nextDouble(); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = random.nextDouble(); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = random.nextDouble(); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = random.nextDouble(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java index ff8593e..e605e88 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java @@ -59,7 +59,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; HiveDecimalWritable[] vector = inputColVector.vector; @@ -68,32 +67,57 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // All must be selected otherwise size would be zero - // Repeating property will not change. - outputIsNull[0] = inputIsNull[0]; - round(0, vector[0], decimalPlaces, outputColVector); + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + round(0, vector[0], decimalPlaces, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - // Set isNull because decimal operation can yield a null. - outputIsNull[i] = false; - round(i, vector[i], decimalPlaces, outputColVector); + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + round(i, vector[i], decimalPlaces, outputColVector); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + round(i, vector[i], decimalPlaces, outputColVector); + } } } else { + if (!outputColVector.noNulls) { - // Set isNull because decimal operation can yield a null. - Arrays.fill(outputIsNull, 0, n, false); + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { round(i, vector[i], decimalPlaces, outputColVector); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -106,7 +130,6 @@ public void evaluate(VectorizedRowBatch batch) { round(i, vector[i], decimalPlaces, outputColVector); } } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java index d474ff0..1b5c07a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -50,60 +52,84 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - BytesColumnVector inV = (BytesColumnVector) batch.cols[inputCol]; + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol]; int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputCol]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputCol]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { //Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { - for (int i = 0; i != n; i++) { - func(outV, inV, i); + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; } - outV.isRepeating = false; - } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); + for(int i = 0; i != n; i++) { + func(outputColVector, inputColVector, i); } - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for (int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } } } @@ -111,7 +137,7 @@ public void evaluate(VectorizedRowBatch batch) { /* Evaluate result for position i (using bytes[] to avoid storage allocation costs) * and set position i of the output vector to the result. */ - protected abstract void func(LongColumnVector outV, BytesColumnVector inV, int i); + protected abstract void func(LongColumnVector outputColVector, BytesColumnVector inputColVector, int i); public int getOutputCol() { return outputCol; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java index 93cf1ec..2213b83 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -44,7 +46,7 @@ public FuncTimestampToDecimal() { inputColumn = -1; } - abstract protected void func(DecimalColumnVector outV, TimestampColumnVector inV, int i); + abstract protected void func(DecimalColumnVector outputColVector, TimestampColumnVector inputColVector, int i); @Override public void evaluate(VectorizedRowBatch batch) { @@ -53,10 +55,12 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - TimestampColumnVector inV = (TimestampColumnVector) batch.cols[inputColumn]; + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -64,51 +68,72 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java index 9eb4312..060070e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -45,7 +47,7 @@ public FuncTimestampToLong() { inputColumn = -1; } - abstract protected void func(LongColumnVector outV, TimestampColumnVector inV, int i); + abstract protected void func(LongColumnVector outputColVector, TimestampColumnVector inputColVector, int i); @Override public void evaluate(VectorizedRowBatch batch) { @@ -54,10 +56,12 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - TimestampColumnVector inV = (TimestampColumnVector) batch.cols[inputColumn]; + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -65,51 +69,72 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java index f9b3f76..bf2c9a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java @@ -64,39 +64,138 @@ public void evaluate(VectorizedRowBatch batch) { return; } - arg2ColVector.flatten(batch.selectedInUse, sel, n); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + /* + * Repeating IF expression? + */ if (arg1ColVector.isRepeating) { - if (!null1[0] && vector1[0] == 1) { - outputColVector.setElement(0, 0, arg2ColVector); + if ((arg1ColVector.noNulls || !null1[0]) && vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { + outputColVector.isRepeating = true; outputColVector.noNulls = false; isNull[0] = true; } return; } - if (batch.selectedInUse) { - for (int j = 0; j < n; j++) { - int i = sel[j]; - if (!null1[0] && vector1[i] == 1) { - outputColVector.setElement(i, i, arg2ColVector); + + if (arg1ColVector.noNulls) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + /* + * Repeating THEN expression? + */ + if (arg2ColVector.isRepeating) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + isNull[i] = false; + // Assign repeated value (index 0) over and over. + outputColVector.setElement(i, 0, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + for (int i = 0; i < n; i++) { + if (vector1[i] == 1) { + isNull[i] = false; + // Assign repeated value (index 0) over and over. + outputColVector.setElement(i, 0, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } } else { - outputColVector.noNulls = false; - isNull[i] = true; + for (int i = 0; i < n; i++) { + if (vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } } } - } else { - for (int i = 0; i < n; i++) { - if (!null1[0] && vector1[i] == 1) { - outputColVector.setElement(i, i, arg2ColVector); + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + /* + * Repeating THEN expression? + */ + if (arg2ColVector.isRepeating) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[i] && vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } } else { - outputColVector.noNulls = false; - isNull[i] = true; + for (int i = 0; i < n; i++) { + if (!null1[i] && vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[i] && vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + for (int i = 0; i < n; i++) { + if (!null1[i] && vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } } } } - - arg2ColVector.unFlatten(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java index e7d4e4d..4296692 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java @@ -65,8 +65,7 @@ public void evaluate(VectorizedRowBatch batch) { DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + int n = batch.size; long[] vector1 = arg1ColVector.vector; double[] vector2 = arg2ColVector.vector; @@ -78,6 +77,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + /* All the code paths below propagate nulls even if neither arg2 nor arg3 * have nulls. This is to reduce the number of code paths and shorten the * code, at the expense of maybe doing unnecessary work if neither input @@ -85,7 +87,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -98,6 +100,15 @@ public void evaluate(VectorizedRowBatch batch) { arg3ColVector.flatten(batch.selectedInUse, sel, n); if (arg1ColVector.noNulls) { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -112,7 +123,16 @@ public void evaluate(VectorizedRowBatch batch) { arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java index fa7b2da..099a319 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java @@ -65,8 +65,7 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -75,6 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + /* All the code paths below propagate nulls even if neither arg2 nor arg3 * have nulls. This is to reduce the number of code paths and shorten the * code, at the expense of maybe doing unnecessary work if neither input @@ -82,7 +84,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -95,21 +97,39 @@ public void evaluate(VectorizedRowBatch batch) { arg3ColVector.flatten(batch.selectedInUse, sel, n); if (arg1ColVector.noNulls) { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i)); outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i)); } } else { for(int i = 0; i != n; i++) { - outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i)); outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i)); } } - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java index 487fb97..905ffba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; @@ -67,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -78,7 +82,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { outputColVector.fill(arg3Scalar); @@ -94,14 +98,25 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3Scalar); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3Scalar); } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java index 7b18cf8..e99754b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; @@ -67,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -78,7 +82,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -94,16 +98,47 @@ public void evaluate(VectorizedRowBatch batch) { if (arg1ColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.set( + i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.set( + i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); + outputColVector.set( + i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java index 0ba6722..5875d48 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java @@ -68,8 +68,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = false; // output is a scalar which we know is non null - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -79,23 +81,54 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { outputColVector.fill(arg3Scalar); } - } else if (arg1ColVector.noNulls) { + return; + } + + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index 0c8a2f6..d8ec895 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -64,8 +64,10 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; long[] vector2 = arg2ColVector.vector; @@ -96,6 +98,9 @@ public void evaluate(VectorizedRowBatch batch) { arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java index 85c37f9..4afdce4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java @@ -64,39 +64,132 @@ public void evaluate(VectorizedRowBatch batch) { return; } - arg2ColVector.flatten(batch.selectedInUse, sel, n); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + /* + * Repeating IF expression? + */ if (arg1ColVector.isRepeating) { - if (!null1[0] && vector1[0] == 1) { + if ((arg1ColVector.noNulls || !null1[0]) && vector1[0] == 1) { + outputColVector.isRepeating = true; outputColVector.noNulls = false; isNull[0] = true; } else { - outputColVector.setElement(0, 0, arg2ColVector); + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } return; } - if (batch.selectedInUse) { - for (int j = 0; j < n; j++) { - int i = sel[j]; - if (!null1[0] && vector1[i] == 1) { - outputColVector.noNulls = false; - isNull[i] = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (arg1ColVector.noNulls) { + + /* + * Repeating ELSE expression? + */ + if (arg2ColVector.isRepeating) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } + } } else { - outputColVector.setElement(i, i, arg2ColVector); + for (int i = 0; i < n; i++) { + if (vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } + } + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } + } + } else { + for (int i = 0; i < n; i++) { + if (vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } + } } } } else { - for (int i = 0; i < n; i++) { - if (!null1[0] && vector1[i] == 1) { - outputColVector.noNulls = false; - isNull[i] = true; + + /* + * Repeating ELSE expression? + */ + if (arg2ColVector.isRepeating) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[i] && vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } + } } else { - outputColVector.setElement(i, i, arg2ColVector); + for (int i = 0; i < n; i++) { + if (!null1[i] && vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, 0, arg2ColVector); + } + } + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[i] && vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } + } + } else { + for (int i = 0; i < n; i++) { + if (!null1[i] && vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } + } } } } - - arg2ColVector.unFlatten(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java new file mode 100644 index 0000000..5a68cec --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class IfExprNullNull extends VectorExpression { + + private static final long serialVersionUID = 1L; + + public IfExprNullNull(int outputColumnNum) { + super(outputColumnNum); + } + + public IfExprNullNull() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + final ColumnVector outputColVector = batch.cols[outputColumnNum]; + + // We do not need to do a column reset since we are carefully changing the output. + + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; + } + + @Override + public String vectorExpressionParameters() { + return "null, null"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + throw new UnsupportedOperationException("Undefined descriptor"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java index 09aa9ab..bb57e4e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java @@ -67,8 +67,10 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -86,7 +88,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -98,6 +100,11 @@ public void evaluate(VectorizedRowBatch batch) { arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -134,6 +141,7 @@ public void evaluate(VectorizedRowBatch batch) { } } } else /* there are nulls */ { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java index 9167178..998448a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java @@ -69,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -88,7 +90,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { outputColVector.fill(arg3Scalar); @@ -99,7 +101,14 @@ public void evaluate(VectorizedRowBatch batch) { // extend any repeating values and noNulls indicator in the inputs arg2ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { + + // FUTURE: We could check arg2ColVector.noNulls and optimize these loops. if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java index 84d0052..c597a34 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java @@ -70,8 +70,15 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -89,7 +96,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -100,6 +107,11 @@ public void evaluate(VectorizedRowBatch batch) { // extend any repeating values and noNulls indicator in the input arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java index 5ed457b..9c0e7be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -67,8 +68,11 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - outputColVector.noNulls = true; // output must be a scalar and neither one is null - outputColVector.isRepeating = false; // may override later + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -80,11 +84,12 @@ public void evaluate(VectorizedRowBatch batch) { outputColVector.initBuffer(); if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar); + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.setRef(0, arg2Scalar, 0, arg2Scalar.length); } else { - outputColVector.fill(arg3Scalar); + outputColVector.setRef(0, arg3Scalar, 0, arg3Scalar.length); } + outputColVector.isRepeating = true; return; } @@ -92,6 +97,7 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; if (vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { @@ -99,6 +105,7 @@ public void evaluate(VectorizedRowBatch batch) { } } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { if (vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); @@ -111,6 +118,7 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; if (!arg1ColVector.isNull[i] && vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { @@ -118,6 +126,7 @@ public void evaluate(VectorizedRowBatch batch) { } } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { if (!arg1ColVector.isNull[i] && vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java index ee3cd19..ed21ce9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java @@ -64,8 +64,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -81,7 +83,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -93,6 +95,11 @@ public void evaluate(VectorizedRowBatch batch) { arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java index b98ddbe..c0cb2c1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java @@ -19,13 +19,12 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** * Compute IF(expr1, expr2, expr3) for 3 input column expressions. @@ -70,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -81,7 +82,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { outputColVector.fill(arg3Scalar); @@ -93,13 +94,19 @@ public void evaluate(VectorizedRowBatch batch) { // reduce the number of code paths needed below. arg2ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Since we always set a value, make sure all isNull entries are set to false. + */ + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3Scalar); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3Scalar); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java index abd585d..0798f1f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -69,8 +70,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -80,7 +83,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -96,16 +99,44 @@ public void evaluate(VectorizedRowBatch batch) { if (arg1ColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i)); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.set( + i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i)); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.set( + i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i)); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i)); + outputColVector.set( + i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i)); } } } else /* there are nulls */ { + + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java index 24299e9..0059c58 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java @@ -68,8 +68,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = false; // output is a scalar which we know is non null - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -79,18 +81,44 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { outputColVector.fill(arg3Scalar); } - } else if (arg1ColVector.noNulls) { + return; + } + + /* + * Since we always set a value, make sure all isNull entries are set to false. + */ + + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); } @@ -99,16 +127,16 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2Scalar : arg3Scalar); - outputIsNull[i] = false; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2Scalar : arg3Scalar); } - Arrays.fill(outputIsNull, 0, n, false); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java index 6b141d1..a5cddc6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -52,35 +54,45 @@ public void evaluate(VectorizedRowBatch batch) { ColumnVector inputColVector = batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; int n = batch.size; - long[] outputVector = ((LongColumnVector) batch.cols[outputColumnNum]).vector; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + boolean[] outputIsNull = outputColVector.isNull; if (n <= 0) { // Nothing to do return; } - // output never has nulls for this operator - batch.cols[outputColumnNum].noNulls = true; - if (inputColVector.noNulls) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.noNulls) { + outputColVector.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = 1; - batch.cols[outputColumnNum].isRepeating = true; } else if (inputColVector.isRepeating) { - // All must be selected otherwise size would be zero - // Selection property will not change. - outputVector[0] = nullPos[0] ? 0 : 1; - batch.cols[outputColumnNum].isRepeating = true; + outputColVector.isRepeating = true; + outputIsNull[0] = false; + outputVector[0] = inputIsNull[0] ? 0 : 1; } else { - batch.cols[outputColumnNum].isRepeating = false; + + /* + * Since we have a result for all rows, we don't need to do conditional NULL maintenance or + * turn off noNulls.. + */ + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = nullPos[i] ? 0 : 1; + outputIsNull[i] = false; + outputVector[i] = inputIsNull[i] ? 0 : 1; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { - outputVector[i] = nullPos[i] ? 0 : 1; + outputVector[i] = inputIsNull[i] ? 0 : 1; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java index 7347800..17d567f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -52,34 +54,47 @@ public void evaluate(VectorizedRowBatch batch) { ColumnVector inputColVector = batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; int n = batch.size; - long[] outputVector = ((LongColumnVector) batch.cols[outputColumnNum]).vector; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + boolean[] outputIsNull = outputColVector.isNull; + if (n <= 0) { // Nothing to do, this is EOF return; } - // output never has nulls for this operator - batch.cols[outputColumnNum].noNulls = true; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.noNulls) { + outputColVector.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = 0; - batch.cols[outputColumnNum].isRepeating = true; } else if (inputColVector.isRepeating) { - outputVector[0] = nullPos[0] ? 1 : 0; - batch.cols[outputColumnNum].isRepeating = true; + outputColVector.isRepeating = true; + outputIsNull[0] = false; + outputVector[0] = inputIsNull[0] ? 1 : 0; } else { + + /* + * Since we have a result for all rows, we don't need to do conditional NULL maintenance or + * turn off noNulls.. + */ + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = nullPos[i] ? 1 : 0; + outputIsNull[i] = false; + outputVector[i] = inputIsNull[i] ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { - outputVector[i] = nullPos[i] ? 1 : 0; + outputVector[i] = inputIsNull[i] ? 1 : 0; } } - batch.cols[outputColumnNum].isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java index dfe3bd1..9d22a3c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java @@ -56,7 +56,9 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector indexColumnVector = (LongColumnVector) batch.cols[indexColumnNum]; long[] indexV = indexColumnVector.vector; - outV.noNulls = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (listV.isRepeating) { if (listV.isNull[0]) { outV.isNull[0] = true; @@ -68,8 +70,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[0] = true; outV.noNulls = false; } else { - outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV); outV.isNull[0] = false; + outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV); } outV.isRepeating = true; } else { @@ -79,11 +81,11 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[j] = true; outV.noNulls = false; } else { - outV.setElement(j, (int) (listV.offsets[0] + indexV[j]), childV); outV.isNull[j] = false; + outV.setElement(j, (int) (listV.offsets[0] + indexV[j]), childV); + } } - outV.isRepeating = false; } } } else { @@ -93,11 +95,10 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[j] = true; outV.noNulls = false; } else { - outV.setElement(j, (int) (listV.offsets[j] + indexV[j]), childV); outV.isNull[j] = false; + outV.setElement(j, (int) (listV.offsets[j] + indexV[j]), childV); } } - outV.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java index 62860df..948652a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java @@ -53,7 +53,10 @@ public void evaluate(VectorizedRowBatch batch) { ListColumnVector listV = (ListColumnVector) batch.cols[listColumnNum]; ColumnVector childV = listV.child; - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (listV.isRepeating) { if (listV.isNull[0]) { outV.isNull[0] = true; @@ -63,8 +66,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[0] = true; outV.noNulls = false; } else { - outV.setElement(0, (int) (listV.offsets[0] + index), childV); outV.isNull[0] = false; + outV.setElement(0, (int) (listV.offsets[0] + index), childV); } } outV.isRepeating = true; @@ -75,8 +78,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[j] = true; outV.noNulls = false; } else { - outV.setElement(j, (int) (listV.offsets[j] + index), childV); outV.isNull[j] = false; + outV.setElement(j, (int) (listV.offsets[j] + index), childV); } } outV.isRepeating = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java index c2f7143..42483c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java @@ -69,9 +69,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java index 0991bda..67d1e76 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -60,8 +62,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -71,27 +71,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (value == 0) { // Denominator is zero, convert the batch to nulls outputColVector.noNulls = false; outputColVector.isRepeating = true; outputIsNull[0] = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; } else if (inputColVector.isRepeating) { - outputVector[0] = vector[0] / (double) value; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] / (double) value; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] / (double) value; + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = vector[i] / (double) value; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = vector[i] / (double) value; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = vector[i] / (double) value; } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java index 2d66cee..608c32a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java @@ -66,17 +66,14 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. */ @@ -117,9 +114,9 @@ public void evaluate(VectorizedRowBatch batch) { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java index 242fddc..1a82e8e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -25,8 +27,8 @@ public class LongColEqualLongScalar extends VectorExpression { private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongColEqualLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -45,6 +47,12 @@ public LongColEqualLongScalar() { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -52,55 +60,75 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] == value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = vector[0] == value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] == value ? 1 : 0; + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] == value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] == value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; + outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java index dc1a331..eb040ca 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java @@ -66,17 +66,14 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. */ @@ -117,9 +114,9 @@ public void evaluate(VectorizedRowBatch batch) { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java index 633015e..3f0ece7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private int colNum; - private long value; + protected int colNum; + protected long value; public LongColGreaterEqualLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] >= value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] >= value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] >= value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] >= value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java index e56d800..9ab9e1e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java @@ -66,17 +66,14 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. */ @@ -117,9 +114,9 @@ public void evaluate(VectorizedRowBatch batch) { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java index 25c07df..4d34707 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -25,8 +27,8 @@ public class LongColGreaterLongScalar extends VectorExpression { private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongColGreaterLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -52,8 +54,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -63,44 +65,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] > value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] > value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = (value - vector[i]) >>> 63; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = (value - vector[i]) >>> 63; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a > b" is "(b - a) >>> 63" outputVector[i] = (value - vector[i]) >>> 63; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] > value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] > value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; + outputVector[i] = (value - vector[i]) >>> 63; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (value - vector[i]) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java index f052675..004bf4f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java @@ -66,17 +66,14 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. */ @@ -117,9 +114,9 @@ public void evaluate(VectorizedRowBatch batch) { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java index 1e5b349..b50bdd3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private int colNum; - private long value; + protected int colNum; + protected long value; public LongColLessEqualLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] <= value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] <= value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] <= value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] <= value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java index fe700c3..3a3425b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java @@ -66,12 +66,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java index 2f282a9..f32de24 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongColLessLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] < value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] < value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = (vector[i] - value) >>> 63; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = (vector[i] - value) >>> 63; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a < b" is "(a - b) >>> 63" outputVector[i] = (vector[i] - value) >>> 63; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] < value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] < value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; + outputVector[i] = (vector[i] - value) >>> 63; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (vector[i] - value) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java index 19fc3a6..cfd61a9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java @@ -71,12 +71,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java index 8307e78..833b8fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java @@ -66,17 +66,14 @@ public void evaluate(VectorizedRowBatch batch) { return; } - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. */ @@ -117,9 +114,9 @@ public void evaluate(VectorizedRowBatch batch) { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java index 0e78f8d..b4c68fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongColNotEqualLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] != value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] != value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] != value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] != value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java index 6c5bb68..181aeae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java @@ -64,8 +64,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -75,49 +75,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java index 7cdce0b..df78433 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -60,8 +62,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -71,23 +71,51 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + boolean hasDivBy0 = false; if (inputColVector.isRepeating) { - long denom = vector[0]; - outputVector[0] = value / denom; - hasDivBy0 = hasDivBy0 || (denom == 0); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + long denom = vector[0]; + outputVector[0] = value / denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - long denom = vector[i]; - outputVector[i] = value / denom; - hasDivBy0 = hasDivBy0 || (denom == 0); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + long denom = vector[i]; + outputVector[i] = value / denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + long denom = vector[i]; + outputVector[i] = value / denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { long denom = vector[i]; outputVector[i] = value / denom; @@ -95,6 +123,10 @@ public void evaluate(VectorizedRowBatch batch) { } } } else /* there are nulls */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -104,12 +136,12 @@ public void evaluate(VectorizedRowBatch batch) { outputIsNull[i] = inputIsNull[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { long denom = vector[i]; outputVector[i] = value / denom; hasDivBy0 = hasDivBy0 || (denom == 0); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java index 8d915c2..2ca74c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarEqualLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] == value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value == vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value == vector[i] ? 1 : 0; + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value == vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = value == vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; + outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java index a06fb08..ac245f6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -25,8 +27,8 @@ public class LongScalarGreaterEqualLongColumn extends VectorExpression { private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarGreaterEqualLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -52,8 +54,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -63,44 +65,72 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value >= vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value >= vector[i] ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value >= vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = value >= vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java index 6610288..f8cd8a9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private int colNum; - private long value; + protected int colNum; + protected long value; public LongScalarGreaterLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,72 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value > vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value > vector[i] ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { // The SIMD optimized form of "a > b" is "(b - a) >>> 63" outputVector[i] = (vector[i] - value) >>> 63; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value > vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = value > vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; + outputVector[i] = (vector[i] - value) >>> 63; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (vector[i] - value) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java index 7a305d3..7b3d4b3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarLessEqualLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,72 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value <= vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value <= vector[i] ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value <= vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = value <= vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java index 763dfdf..948f812 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarLessLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,72 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value < vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value < vector[i] ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { // The SIMD optimized form of "a < b" is "(a - b) >>> 63" outputVector[i] = (value - vector[i]) >>> 63; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value < vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = value < vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; + outputVector[i] = (value - vector[i]) >>> 63; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (value - vector[i]) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java index aecaed2..1191b31 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarNotEqualLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,72 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value != vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value != vector[i] ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value != vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = value != vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java index c52e337..0976f20 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -44,7 +46,7 @@ public LongToStringUnaryUDF() { inputColumn = -1; } - abstract protected void func(BytesColumnVector outV, long[] vector, int i); + abstract protected void func(BytesColumnVector outputColVector, long[] vector, int i); @Override public void evaluate(VectorizedRowBatch batch) { @@ -57,59 +59,87 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; long[] vector = inputColVector.vector; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; - outV.initBuffer(); + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; + outputColVector.initBuffer(); if (n == 0) { //Nothing to do return; } + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - func(outV, vector, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, vector, i); + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, vector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, vector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, vector, i); + func(outputColVector, vector, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... // Handle case with nulls. Don't do function if the value is null, // because the data may be undefined for a null value. - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - func(outV, vector, 0); - } - } else if (batch.selectedInUse) { + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inputColVector.isNull[i]; + outputColVector.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { - func(outV, vector, i); + func(outputColVector, vector, i); } } - outV.isRepeating = false; + outputColVector.isRepeating = false; } else { - System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { - func(outV, vector, i); + func(outputColVector, vector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java index ccc0fcb..aad408f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -64,7 +66,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -74,38 +75,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { - outputVector[0] = func(vector[0]); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = func(vector[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + cleanup(outputColVector, sel, batch.selectedInUse, n); + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = func(vector[i]); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = func(vector[i]); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = func(vector[i]); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = func(vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = func(vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } cleanup(outputColVector, sel, batch.selectedInUse, n); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java index 3375a56..dcebc24 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -63,7 +65,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -73,38 +74,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { - outputVector[0] = func(vector[0]); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = func(vector[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + cleanup(outputColVector, sel, batch.selectedInUse, n); + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = func(vector[i]); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = func(vector[i]); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = func(vector[i]); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = func(vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = func(vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } cleanup(outputColVector, sel, batch.selectedInUse, n); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java index 898cf96..e5b6902 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -62,7 +64,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -72,38 +73,68 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { - outputVector[0] = func(vector[0]); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = func(vector[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = func(vector[i]); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = func(vector[i]); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = func(vector[i]); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = func(vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = func(vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java index 30f20f3..be69f7f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -54,45 +56,61 @@ public void evaluate(VectorizedRowBatch batch) { long[] vector = inputColVector.vector; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; long[] outputVector = outV.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; if (n <= 0) { // Nothing to do, this is EOF return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; // 0 XOR 1 yields 1, 1 XOR 1 yields 0 outputVector[0] = vector[0] ^ 1; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = false; outputVector[i] = vector[i] ^ 1; } - outV.isRepeating = false; } else { + Arrays.fill(outV.isNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector[i] ^ 1; } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outputVector[0] = vector[0] ^ 1; - outV.isNull[0] = inputColVector.isNull[0]; - } else if (batch.selectedInUse) { - outV.isRepeating = false; + + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] ^ 1; outV.isNull[i] = inputColVector.isNull[i]; } } else { - outV.isRepeating = false; for (int i = 0; i != n; i++) { outputVector[i] = vector[i] ^ 1; outV.isNull[i] = inputColVector.isNull[i]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java index eaaade6..3c18853 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java @@ -284,25 +284,56 @@ public static void setNullAndDivBy0DataEntriesLong( } /* - * Propagate null values for a two-input operator. + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. */ public static void propagateNullsColCol(ColumnVector inputColVector1, ColumnVector inputColVector2, ColumnVector outputColVector, int[] sel, int n, boolean selectedInUse) { - outputColVector.noNulls = inputColVector1.noNulls && inputColVector2.noNulls; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - if (outputColVector.noNulls) { - // the inputs might not always have isNull initialized for - // inputColVector1.isNull[i] || inputColVector2.isNull[i] to be valid - Arrays.fill(outputColVector.isNull, false); - return; - } + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.isNull[0] = false; + outputColVector.isRepeating = true; + } else { + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = false; + } + } else { + Arrays.fill(outputColVector.isNull, 0, n, false); + } + } + } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - if (inputColVector2.isRepeating) { - outputColVector.isNull[0] = inputColVector2.isNull[0]; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (!inputColVector2.isNull[0]) { + outputColVector.isNull[0] = false; + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputColVector2.isRepeating) { + if (!inputColVector2.isNull[0]) { + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = false; + } + } else { + Arrays.fill(outputColVector.isNull, 0, n, false); + } + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; // Because every value will be NULL. + } } else { + outputColVector.noNulls = false; if (selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -313,9 +344,32 @@ public static void propagateNullsColCol(ColumnVector inputColVector1, } } } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - if (inputColVector1.isRepeating) { - outputColVector.isNull[0] = inputColVector1.isNull[0]; + + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (!inputColVector1.isNull[0]) { + outputColVector.isNull[0] = false; + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputColVector1.isRepeating) { + if (!inputColVector1.isNull[0]) { + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = false; + } + } else { + Arrays.fill(outputColVector.isNull, 0, n, false); + } + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; // Because every value will be NULL. + } } else { + outputColVector.noNulls = false; if (selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -326,18 +380,23 @@ public static void propagateNullsColCol(ColumnVector inputColVector1, } } } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputColVector.isNull[0] = inputColVector1.isNull[0] || inputColVector2.isNull[0]; - if (outputColVector.isNull[0]) { - outputColVector.isRepeating = true; - return; + if (!inputColVector1.isNull[0] && !inputColVector2.isNull[0]) { + outputColVector.isNull[0] = false; + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; } + outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + if (inputColVector1.isNull[0]) { outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; // because every value will be NULL - return; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; // Because every value will be NULL. } else { + outputColVector.noNulls = false; if (selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -352,9 +411,10 @@ public static void propagateNullsColCol(ColumnVector inputColVector1, } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (inputColVector2.isNull[0]) { outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; // because every value will be NULL - return; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; // Because every value will be NULL. } else { + outputColVector.noNulls = false; if (selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -366,6 +426,7 @@ public static void propagateNullsColCol(ColumnVector inputColVector1, } } } else { // neither side is repeating + outputColVector.noNulls = false; if (selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java index bfd7334..62873e9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -49,33 +51,68 @@ public void evaluate(VectorizedRowBatch batch) { } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int[] sel = batch.selected; int n = batch.size; int [] length = inputColVector.length; - long[] resultLen = outV.vector; + long[] resultLen = outputColVector.vector; if (n == 0) { //Nothing to do return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; resultLen[0] = length[0]; - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - resultLen[i] = length[i]; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + resultLen[i] = length[i]; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + resultLen[i] = length[i]; + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { resultLen[i] = length[i]; } - outV.isRepeating = false; } } else { @@ -83,30 +120,23 @@ public void evaluate(VectorizedRowBatch batch) { * Handle case with nulls. Don't do function if the value is null, to save time, * because calling the function can be expensive. */ - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - resultLen[0] = length[0]; - } - } else if (batch.selectedInUse) { + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if (!inputColVector.isNull[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { resultLen[i] = length[i]; } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!inputColVector.isNull[i]) { + if (!inputIsNull[i]) { resultLen[i] = length[i]; } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java index 20a0a37..db684c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java @@ -15,10 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; import org.apache.hadoop.hive.ql.exec.vector.expressions.AbstractFilterStringColLikeStringScalar.Checker; @@ -70,42 +71,50 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; long[] outputVector = outV.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; // return immediately if batch is empty if (n == 0) { return; } - outV.noNulls = inputColVector.noNulls; - outV.isRepeating = inputColVector.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = (checker.check(vector[0], start[0], length[0]) ? 1 : 0); - outV.isNull[0] = false; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0); outV.isNull[i] = false; + outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0); } } else { + Arrays.fill(outV.isNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0); - outV.isNull[i] = false; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = (checker.check(vector[0], start[0], length[0]) ? 1 : 0); - outV.isNull[0] = false; - } else { - outputVector[0] = LongColumnVector.NULL_VALUE; - outV.isNull[0] = true; - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos[i]) { @@ -114,6 +123,7 @@ public void evaluate(VectorizedRowBatch batch) { } else { outputVector[i] = LongColumnVector.NULL_VALUE; outV.isNull[i] = true; + outV.noNulls = false; } } } else { @@ -124,11 +134,12 @@ public void evaluate(VectorizedRowBatch batch) { } else { outputVector[i] = LongColumnVector.NULL_VALUE; outV.isNull[i] = true; + outV.noNulls = false; } } } } - } + } private Checker borrowChecker() { FilterStringColLikeStringScalar fil = new FilterStringColLikeStringScalar(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java index c889ac1..eb91321 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java @@ -74,57 +74,82 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; int n = batch.size; byte[][] vector = inputColVector.vector; int[] start = inputColVector.start; int[] len = inputColVector.length; long[] outputVector = outputColVector.vector; + boolean[] outputIsNull = outputColVector.isNull; // return immediately if batch is empty if (n == 0) { return; } - outputColVector.isRepeating = inputColVector.isRepeating; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0; - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { + } else /* there are nulls in the inputColVector */ { - // All must be selected otherwise size would be zero - // Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0; - } - outputColVector.isNull[0] = nullPos[0]; - } else if (batch.selectedInUse) { + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + outputColVector.isNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; } - outputColVector.isNull[i] = nullPos[i]; } } else { - System.arraycopy(nullPos, 0, outputColVector.isNull, 0, n); + System.arraycopy(inputIsNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java index f730c9d..6c92e39 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -56,12 +57,14 @@ public void evaluate(VectorizedRowBatch batch) { } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; int n = batch.size; byte[][] vector = inputColVector.vector; int[] start = inputColVector.start; int[] length = inputColVector.length; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -70,55 +73,79 @@ public void evaluate(VectorizedRowBatch batch) { } // initialize output vector buffer to receive data - outV.initBuffer(); + outputColVector.initBuffer(); + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + outputColVector.setConcat(0, vector[0], start[0], length[0], value, 0, value.length); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); + outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... /* * Handle case with nulls. Don't do function if the value is null, to save time, * because calling the function can be expensive. */ - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length); - } - } else if (batch.selectedInUse) { + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputColVector.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { - outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); + outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { + outputColVector.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { - outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); + outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java index cbdcc76..6c40a28 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,6 +59,7 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector inV1 = (BytesColumnVector) batch.cols[colNum1]; BytesColumnVector inV2 = (BytesColumnVector) batch.cols[colNum2]; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; int[] sel = batch.selected; int n = batch.size; byte[][] vector1 = inV1.vector; @@ -81,7 +84,7 @@ public void evaluate(VectorizedRowBatch batch) { if (inV1.noNulls && !inV2.noNulls) { - // propagate nulls + // Carefully handle NULLs... /* We'll assume that there *may* be nulls in the input if !noNulls is true * for an input vector. This is to be more forgiving of errors in loading @@ -89,6 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { * isNull[0] is set if !noNulls and isRepeating are true for the vector. */ outV.noNulls = false; + if (inV2.isRepeating) { if (inV2.isNull[0]) { @@ -321,8 +325,9 @@ public void evaluate(VectorizedRowBatch batch) { } } else { // there are no nulls in either input vector - // propagate null information - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ // perform data operation if (inV1.isRepeating && inV2.isRepeating) { @@ -330,13 +335,16 @@ public void evaluate(VectorizedRowBatch batch) { // All must be selected otherwise size would be zero. Repeating property will not change. outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]); outV.isRepeating = true; + outputIsNull[0] = false; } else if (inV1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); } @@ -345,9 +353,11 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); } @@ -356,9 +366,11 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java index 9b9c063..f1fabb7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -53,66 +55,88 @@ public void evaluate(VectorizedRowBatch batch) { } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; int n = batch.size; byte[][] vector = inputColVector.vector; int [] start = inputColVector.start; int [] length = inputColVector.length; - long[] resultLen = outV.vector; + long[] resultLen = outputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { //Nothing to do return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; resultLen[0] = utf8StringLength(vector[0], start[0], length[0]); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - /* - * Handle case with nulls. Don't do function if the value is null, to save time, - * because calling the function can be expensive. - */ - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - resultLen[0] = utf8StringLength(vector[0], start[0], length[0]); - } - } else if (batch.selectedInUse) { + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputColVector.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; + outputColVector.isRepeating = false; } else { for(int i = 0; i != n; i++) { + outputColVector.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java index 94fbef8..a9f09dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -56,12 +57,14 @@ public void evaluate(VectorizedRowBatch batch) { } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; int n = batch.size; byte[][] vector = inputColVector.vector; int[] start = inputColVector.start; int[] length = inputColVector.length; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -70,55 +73,79 @@ public void evaluate(VectorizedRowBatch batch) { } // initialize output vector buffer to receive data - outV.initBuffer(); + outputColVector.initBuffer(); + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + outputColVector.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); + outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); } - outV.isRepeating = false; } - } else { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... /* * Handle case with nulls. Don't do function if the value is null, to save time, * because calling the function can be expensive. */ - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); - } - } else if (batch.selectedInUse) { + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; if (!inputColVector.isNull[i]) { - outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); + outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); } - outV.isNull[i] = inputColVector.isNull[i]; + outputColVector.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { - outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); + outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); } - outV.isNull[i] = inputColVector.isNull[i]; + outputColVector.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java index 5934f6f..7c58838 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.io.UnsupportedEncodingException; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -125,7 +126,7 @@ public void evaluate(VectorizedRowBatch batch) { } BytesColumnVector inV = (BytesColumnVector) batch.cols[colNum]; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int n = batch.size; @@ -137,82 +138,101 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int[] len = inV.length; int[] start = inV.start; - outV.initBuffer(); + outputColVector.initBuffer(); + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (inV.isRepeating) { - outV.isRepeating = true; if (!inV.noNulls && inV.isNull[0]) { - outV.isNull[0] = true; - outV.noNulls = false; - outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); + outputIsNull[0] = true; + outputColVector.noNulls = false; + outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); return; } else { - outV.noNulls = true; + outputIsNull[0] = false; int offset = getSubstrStartOffset(vector[0], start[0], len[0], startIdx); if (offset != -1) { - outV.setVal(0, vector[0], offset, len[0] - (offset - start[0])); + outputColVector.setVal(0, vector[0], offset, len[0] - (offset - start[0])); } else { - outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); + outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); } } - } else { - outV.isRepeating = false; - if (batch.selectedInUse) { - if (!inV.noNulls) { - outV.noNulls = false; - for (int i = 0; i != n; ++i) { - int selected = sel[i]; - if (!inV.isNull[selected]) { - int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], - startIdx); - outV.isNull[selected] = false; - if (offset != -1) { - outV.setVal(selected, vector[selected], offset, - len[selected] - (offset - start[selected])); - } else { - outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); - } - } else { - outV.isNull[selected] = true; - } - } - } else { - outV.noNulls = true; - for (int i = 0; i != n; ++i) { - int selected = sel[i]; + outputColVector.isRepeating = true; + return; + } + + if (batch.selectedInUse) { + if (!inV.noNulls) /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + for (int i = 0; i != n; ++i) { + int selected = sel[i]; + if (!inV.isNull[selected]) { + outputIsNull[selected] = false; int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], startIdx); + outputColVector.isNull[selected] = false; if (offset != -1) { - outV.setVal(selected, vector[selected], offset, + outputColVector.setVal(selected, vector[selected], offset, len[selected] - (offset - start[selected])); } else { - outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); + outputColVector.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); } + } else { + outputColVector.isNull[selected] = true; + outputColVector.noNulls = false; } } } else { - if (!inV.noNulls) { - outV.noNulls = false; - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); - for (int i = 0; i != n; ++i) { - if (!inV.isNull[i]) { - int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx); - if (offset != -1) { - outV.setVal(i, vector[i], offset, len[i] - (offset - start[i])); - } else { - outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); - } - } + for (int i = 0; i != n; ++i) { + int selected = sel[i]; + outputColVector.isNull[selected] = false; + int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], + startIdx); + if (offset != -1) { + outputColVector.setVal(selected, vector[selected], offset, + len[selected] - (offset - start[selected])); + } else { + outputColVector.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); } - } else { - outV.noNulls = true; - for (int i = 0; i != n; ++i) { + } + } + } else { + if (!inV.noNulls) /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + for (int i = 0; i != n; ++i) { + if (!inV.isNull[i]) { + outputColVector.isNull[i] = false; int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx); if (offset != -1) { - outV.setVal(i, vector[i], offset, len[i] - (offset - start[i])); + outputColVector.setVal(i, vector[i], offset, len[i] - (offset - start[i])); } else { - outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); + outputColVector.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); } + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for (int i = 0; i != n; ++i) { + int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx); + if (offset != -1) { + outputColVector.setVal(i, vector[i], offset, len[i] - (offset - start[i])); + } else { + outputColVector.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java index 9d6eccf..7c5d19a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.io.UnsupportedEncodingException; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -146,7 +147,7 @@ public void evaluate(VectorizedRowBatch batch) { } BytesColumnVector inV = (BytesColumnVector) batch.cols[colNum]; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int n = batch.size; @@ -158,82 +159,98 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int[] len = inV.length; int[] start = inV.start; - outV.initBuffer(); + outputColVector.initBuffer(); + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (inV.isRepeating) { - outV.isRepeating = true; + if (!inV.noNulls && inV.isNull[0]) { - outV.isNull[0] = true; - outV.noNulls = false; - outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); - return; + outputIsNull[0] = true; + outputColVector.noNulls = false; + outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); } else { - outV.noNulls = true; + outputIsNull[0] = false; populateSubstrOffsets(vector[0], start[0], len[0], startIdx, length, offsetArray); if (offsetArray[0] != -1) { - outV.setVal(0, vector[0], offsetArray[0], offsetArray[1]); + outputColVector.setVal(0, vector[0], offsetArray[0], offsetArray[1]); } else { - outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); + outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); } } - } else { - outV.isRepeating = false; - if (batch.selectedInUse) { - if (!inV.noNulls) { - outV.noNulls = false; - for (int i = 0; i != n; ++i) { - int selected = sel[i]; - if (!inV.isNull[selected]) { - outV.isNull[selected] = false; - populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, - length, offsetArray); - if (offsetArray[0] != -1) { - outV.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]); - } else { - outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); - } - } else { - outV.isNull[selected] = true; - } - } - } else { - outV.noNulls = true; - for (int i = 0; i != n; ++i) { - int selected = sel[i]; - outV.isNull[selected] = false; + outputColVector.isRepeating = true; + return; + } + + if (batch.selectedInUse) { + if (!inV.noNulls) /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + for (int i = 0; i != n; ++i) { + int selected = sel[i]; + if (!inV.isNull[selected]) { + outputIsNull[selected] = false; populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, length, offsetArray); if (offsetArray[0] != -1) { - outV.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]); + outputColVector.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]); } else { - outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); + outputColVector.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); } + } else { + outputIsNull[selected] = true; + outputColVector.noNulls = false; } } } else { - if (!inV.noNulls) { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); - outV.noNulls = false; - for (int i = 0; i != n; ++i) { - if (!inV.isNull[i]) { - populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray); - if (offsetArray[0] != -1) { - outV.setVal(i, vector[i], offsetArray[0], offsetArray[1]); - } else { - outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); - } - } + for (int i = 0; i != n; ++i) { + int selected = sel[i]; + outputColVector.isNull[selected] = false; + populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, + length, offsetArray); + if (offsetArray[0] != -1) { + outputColVector.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]); + } else { + outputColVector.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); } - } else { - outV.noNulls = true; - for (int i = 0; i != n; ++i) { - outV.isNull[i] = false; + } + } + } else { + if (!inV.noNulls) /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + for (int i = 0; i != n; ++i) { + if (!inV.isNull[i]) { + outputIsNull[i] = false; populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray); if (offsetArray[0] != -1) { - outV.setVal(i, vector[i], offsetArray[0], offsetArray[1]); + outputColVector.setVal(i, vector[i], offsetArray[0], offsetArray[1]); } else { - outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); + outputColVector.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); } + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for (int i = 0; i != n; ++i) { + populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray); + if (offsetArray[0] != -1) { + outputColVector.setVal(i, vector[i], offsetArray[0], offsetArray[1]); + } else { + outputColVector.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java index 544b700..9b7005d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java @@ -71,8 +71,10 @@ public void evaluate(VectorizedRowBatch batch) { byte[][] vector = inputColVector.vector; int [] start = inputColVector.start; int [] length = inputColVector.length; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; - outV.initBuffer(); + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.initBuffer(); Text t; if (n == 0) { @@ -86,72 +88,86 @@ public void evaluate(VectorizedRowBatch batch) { // It's implemented in the simplest way now, just calling the // existing built-in function. - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; s.set(vector[0], start[0], length[0]); t = func.evaluate(s); - setString(outV, 0, t); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; + setString(outputColVector, 0, t); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } - /* Fill output isNull with false for selected elements since there is a chance we'll - * convert to noNulls == false in setString(); - */ - outV.isNull[i] = false; - s.set(vector[i], start[i], length[i]); - t = func.evaluate(s); - setString(outV, i, t); + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + s.set(vector[i], start[i], length[i]); + t = func.evaluate(s); + setString(outputColVector, i, t); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + s.set(vector[i], start[i], length[i]); + t = func.evaluate(s); + setString(outputColVector, i, t); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { - // Set all elements to not null. The setString call can override this. - Arrays.fill(outV.isNull, 0, n, false); + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { s.set(vector[i], start[i], length[i]); t = func.evaluate(s); - setString(outV, i, t); + setString(outputColVector, i, t); } - outV.isRepeating = false; } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; // setString can override this - if (!inputColVector.isNull[0]) { - s.set(vector[0], start[0], length[0]); - t = func.evaluate(s); - setString(outV, 0, t); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inputColVector.isNull[i]; // setString can override this + outputColVector.isNull[i] = inputColVector.isNull[i]; // setString can override this if (!inputColVector.isNull[i]) { s.set(vector[i], start[i], length[i]); t = func.evaluate(s); - setString(outV, i, t); + setString(outputColVector, i, t); } } - outV.isRepeating = false; } else { // setString can override this null propagation - System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { s.set(vector[i], start[i], length[i]); t = func.evaluate(s); - setString(outV, i, t); + setString(outputColVector, i, t); } } - outV.isRepeating = false; } } } @@ -159,13 +175,13 @@ public void evaluate(VectorizedRowBatch batch) { /* Set the output string entry i to the contents of Text object t. * If t is a null object reference, record that the value is a SQL NULL. */ - private static void setString(BytesColumnVector outV, int i, Text t) { + private static void setString(BytesColumnVector outputColVector, int i, Text t) { if (t == null) { - outV.noNulls = false; - outV.isNull[i] = true; + outputColVector.noNulls = false; + outputColVector.isNull[i] = true; return; } - outV.setVal(i, t.getBytes(), 0, t.getLength()); + outputColVector.setVal(i, t.getBytes(), 0, t.getLength()); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java index 2f8b627..9462347 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -44,7 +46,7 @@ public StringUnaryUDFDirect() { inputColumn = -1; } - abstract protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i); + abstract protected void func(BytesColumnVector outputColVector, byte[][] vector, int[] start, int[] length, int i); @Override public void evaluate(VectorizedRowBatch batch) { @@ -59,59 +61,82 @@ public void evaluate(VectorizedRowBatch batch) { byte[][] vector = inputColVector.vector; int start[] = inputColVector.start; int length[] = inputColVector.length; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; - outV.initBuffer(); + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.initBuffer(); if (n == 0) { //Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, vector, start, length, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - func(outV, vector, start, length, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, vector, start, length, i); + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, vector, start, length, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, vector, start, length, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, vector, start, length, i); + func(outputColVector, vector, start, length, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - func(outV, vector, start, length, 0); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inputColVector.isNull[i]; + outputColVector.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { - func(outV, vector, start, length, i); + func(outputColVector, vector, start, length, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { - func(outV, vector, start, length, i); + func(outputColVector, vector, start, length, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java index 7fb95f5..31a0ad1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java @@ -73,8 +73,8 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -83,49 +83,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = inSet.contains(inputColVector.asScratchTimestamp(0)) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { - - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.contains(inputColVector.asScratchTimestamp(0)) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java index 5eb2090..13abfd3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -44,7 +46,7 @@ public TimestampToStringUnaryUDF() { inputColumn = -1; } - abstract protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i); + abstract protected void func(BytesColumnVector outputColVector, TimestampColumnVector inV, int i); @Override public void evaluate(VectorizedRowBatch batch) { @@ -56,59 +58,82 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; - outV.initBuffer(); + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.initBuffer(); if (n == 0) { //Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - func(outV, inputColVector, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inputColVector, i); + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inputColVector, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - func(outV, inputColVector, 0); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inputColVector.isNull[i]; + outputColVector.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { - func(outV, inputColVector, i); + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { - func(outV, inputColVector, i); + func(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java index ea78a2e..3a560ca 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java @@ -35,6 +35,10 @@ private final int[] inputColumns; + // The unassigned batchIndex for the rows that have not received a non-NULL value yet. + // A temporary work array. + private transient int[] unassignedBatchIndices; + public VectorCoalesce(int [] inputColumns, int outputColumnNum) { super(outputColumnNum); this.inputColumns = inputColumns; @@ -57,66 +61,174 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; - ColumnVector outputVector = batch.cols[outputColumnNum]; + ColumnVector outputColVector = batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; if (n <= 0) { // Nothing to do return; } - outputVector.init(); + if (unassignedBatchIndices == null || n > unassignedBatchIndices.length) { + + // (Re)allocate larger to be a multiple of 1024 (DEFAULT_SIZE). + final int roundUpSize = + ((n + VectorizedRowBatch.DEFAULT_SIZE - 1) / VectorizedRowBatch.DEFAULT_SIZE) + * VectorizedRowBatch.DEFAULT_SIZE; + unassignedBatchIndices = new int[roundUpSize]; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - boolean noNulls = false; + // CONSIDER: Should be do this for all vector expressions that can + // work on BytesColumnVector output columns??? + outputColVector.init(); + + final int columnCount = inputColumns.length; + + /* + * Process the input columns to find a non-NULL value for each row. + * + * We track the unassigned batchIndex of the rows that have not received + * a non-NULL value yet. Similar to a selected array. + */ + boolean isAllUnassigned = true; + int unassignedColumnCount = 0; for (int k = 0; k < inputColumns.length; k++) { ColumnVector cv = batch.cols[inputColumns[k]]; - // non-nulls in any column qualifies coalesce having no nulls - // common case: last column is a constant & non-null - noNulls = noNulls || cv.noNulls; - } - - outputVector.noNulls = noNulls; - outputVector.isRepeating = false; - - ColumnVector first = batch.cols[inputColumns[0]]; - - if (first.noNulls && first.isRepeating) { - outputVector.isRepeating = true; - outputVector.isNull[0] = false; - outputVector.setElement(0, 0, first); - } else if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector.isNull[i] = true; - for (int k = 0; k < inputColumns.length; k++) { - ColumnVector cv = batch.cols[inputColumns[k]]; - if ( (cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) { - outputVector.isNull[i] = false; - outputVector.setElement(i, 0, cv); - break; - } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) { - outputVector.isNull[i] = false; - outputVector.setElement(i, i, cv); - break; + if (cv.isRepeating) { + + if (cv.noNulls || !cv.isNull[0]) { + + /* + * With a repeating value we can finish all remaining rows. + */ + if (isAllUnassigned) { + + // No other columns provided non-NULL values. We can return repeated output. + outputIsNull[0] = false; + outputColVector.setElement(0, 0, cv); + outputColVector.isRepeating = true; + return; + } else { + + // Some rows have already been assigned values. Assign the remaining. + // We cannot use copySelected method here. + for (int i = 0; i < unassignedColumnCount; i++) { + final int batchIndex = unassignedBatchIndices[i]; + outputIsNull[batchIndex] = false; + + // Our input is repeating (i.e. inputColNumber = 0). + outputColVector.setElement(batchIndex, 0, cv); + } + return; } + } else { + + // Repeated NULLs -- skip this input column. } - } - } else { - for (int i = 0; i != n; i++) { - outputVector.isNull[i] = true; - for (int k = 0; k < inputColumns.length; k++) { - ColumnVector cv = batch.cols[inputColumns[k]]; - if ((cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) { - outputVector.isNull[i] = false; - outputVector.setElement(i, 0, cv); - break; - } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) { - outputVector.isNull[i] = false; - outputVector.setElement(i, i, cv); - break; + } else { + + /* + * Non-repeating input column. Use any non-NULL values for unassigned rows. + */ + if (isAllUnassigned) { + + /* + * No other columns provided non-NULL values. We *may* be able to finish all rows + * with this input column... + */ + if (cv.noNulls){ + + // Since no NULLs, we can provide values for all rows. + if (batch.selectedInUse) { + for (int i = 0; i < n; i++) { + final int batchIndex = sel[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, cv); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for (int batchIndex = 0; batchIndex < n; batchIndex++) { + outputColVector.setElement(batchIndex, batchIndex, cv); + } + } + return; + } else { + + // We might not be able to assign all rows because of input NULLs. Start tracking any + // unassigned rows. + boolean[] inputIsNull = cv.isNull; + if (batch.selectedInUse) { + for (int i = 0; i < n; i++) { + final int batchIndex = sel[i]; + if (!inputIsNull[batchIndex]) { + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, cv); + } else { + unassignedBatchIndices[unassignedColumnCount++] = batchIndex; + } + } + } else { + for (int batchIndex = 0; batchIndex < n; batchIndex++) { + if (!inputIsNull[batchIndex]) { + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, cv); + } else { + unassignedBatchIndices[unassignedColumnCount++] = batchIndex; + } + } + } + if (unassignedColumnCount == 0) { + return; + } + isAllUnassigned = false; + } + } else { + + /* + * We previously assigned *some* rows with non-NULL values. The batch indices of + * the unassigned row were tracked. + */ + if (cv.noNulls) { + + // Assign all remaining rows. + for (int i = 0; i < unassignedColumnCount; i++) { + final int batchIndex = unassignedBatchIndices[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, cv); + } + return; + } else { + + // Use any non-NULL values found; remember the remaining unassigned. + boolean[] inputIsNull = cv.isNull; + int newUnassignedColumnCount = 0; + for (int i = 0; i < unassignedColumnCount; i++) { + final int batchIndex = unassignedBatchIndices[i]; + if (!inputIsNull[batchIndex]) { + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, cv); + } else { + unassignedBatchIndices[newUnassignedColumnCount++] = batchIndex; + } + } + if (newUnassignedColumnCount == 0) { + return; + } + unassignedColumnCount = newUnassignedColumnCount; } } } } + + // NULL out the remaining columns. + outputColVector.noNulls = false; + for (int i = 0; i < unassignedColumnCount; i++) { + final int batchIndex = unassignedBatchIndices[i]; + outputIsNull[batchIndex] = true; + } } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java index 0dde5bd..a30a7df 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java @@ -24,6 +24,14 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +/* + * ELT(index, string, ....) returns the string column/expression value at the specified + * index expression. + * + * The first argument expression indicates the index of the string to be retrieved from + * remaining arguments. We return NULL when the index number is less than 1 or + * index number is greater than the number of the string arguments. + */ public class VectorElt extends VectorExpression { private static final long serialVersionUID = 1L; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index f7fdb57..bd594e6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -387,8 +387,35 @@ public Object setValue(Object field, ColumnVector column, int row) throws HiveEx * if the wrong vector column is used. */ private static abstract class VectorExpressionWriterDecimal extends VectorExpressionWriterBase { + @Override public Object writeValue(ColumnVector column, int row) throws HiveException { + if (column instanceof Decimal64ColumnVector) { + Decimal64ColumnVector d64cv = (Decimal64ColumnVector) column; + final long decimal64Long; + if (d64cv.noNulls && !d64cv.isRepeating) { + decimal64Long = d64cv.vector[row]; + } else if (d64cv.noNulls && d64cv.isRepeating) { + decimal64Long = d64cv.vector[0]; + } else if (!d64cv.noNulls && !d64cv.isRepeating && !d64cv.isNull[row]) { + decimal64Long = d64cv.vector[row]; + } else if (!d64cv.noNulls && !d64cv.isRepeating && d64cv.isNull[row]) { + return null; + } else if (!d64cv.noNulls && d64cv.isRepeating && !d64cv.isNull[0]) { + decimal64Long = d64cv.vector[0]; + } else if (!d64cv.noNulls && d64cv.isRepeating && d64cv.isNull[0]) { + return null; + } else { + throw new HiveException( + String.format( + "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", + row, d64cv.noNulls, d64cv.isRepeating, d64cv.isNull[row], d64cv.isNull[0])); + } + + HiveDecimalWritable scratchHiveDecimalWritable = d64cv.getScratchWritable(); + scratchHiveDecimalWritable.deserialize64(decimal64Long, d64cv.scale); + return writeValue(scratchHiveDecimalWritable); + } DecimalColumnVector dcv = (DecimalColumnVector) column; if (dcv.noNulls && !dcv.isRepeating) { return writeValue(dcv.vector[row]); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java index b2891a8..f6e9c8b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java @@ -91,7 +91,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - // Handle null + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse); switch (primitiveCategory) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java index e232555..7bb5c54 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java @@ -32,6 +32,7 @@ import org.apache.hive.common.util.DateParser; import java.sql.Date; +import java.util.Arrays; public class VectorUDFDateAddColScalar extends VectorExpression { private static final long serialVersionUID = 1L; @@ -77,52 +78,84 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; ColumnVector inputCol = batch.cols[this.colNum]; /* every line below this is identical for evaluateLong & evaluateString */ final int n = inputCol.isRepeating ? 1 : batch.size; int[] sel = batch.selected; final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; + boolean[] outputIsNull = outputColVector.isNull; if(batch.size == 0) { /* n != batch.size when isRepeating */ return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; switch (primitiveCategory) { case DATE: - if (inputCol.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = evaluateDate(inputCol, i); + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outputColVector.isNull[0] = false; + outputColVector.vector[0] = evaluateDate(inputCol, 0); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputCol.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.vector[i] = evaluateDate(inputCol, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.vector[i] = evaluateDate(inputCol, i); + } } } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = evaluateDate(inputCol, i); + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputColVector.vector[i] = evaluateDate(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. - outV.noNulls = false; + outputColVector.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateDate(inputCol, i); + outputColVector.vector[i] = evaluateDate(inputCol, i); } } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateDate(inputCol, i); + outputColVector.vector[i] = evaluateDate(inputCol, i); } } } @@ -130,35 +163,66 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outputColVector.isNull[0] = false; + outputColVector.vector[0] = evaluateTimestamp(inputCol, 0); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputCol.noNulls) { if (batch.selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = evaluateTimestamp(inputCol, i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); + } } } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = evaluateTimestamp(inputCol, i); + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. - outV.noNulls = false; + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateTimestamp(inputCol, i); + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); } } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateTimestamp(inputCol, i); + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); } } } @@ -168,35 +232,66 @@ public void evaluate(VectorizedRowBatch batch) { case STRING: case CHAR: case VARCHAR: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outputColVector.isNull[0] = false; + evaluateString(inputCol, outputColVector, 0); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputCol.noNulls) { if (batch.selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - evaluateString(inputCol, outV, i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluateString(inputCol, outputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + evaluateString(inputCol, outputColVector, i); + } } } else { - for(int i = 0; i < n; i++) { - evaluateString(inputCol, outV, i); + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + evaluateString(inputCol, outputColVector, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. - outV.noNulls = false; + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - evaluateString(inputCol, outV, i); + evaluateString(inputCol, outputColVector, i); } } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - evaluateString(inputCol, outV, i); + evaluateString(inputCol, outputColVector, i); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 0aaba26..ecde39b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -30,6 +30,7 @@ import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; +import java.util.Arrays; public class VectorUDFDateAddScalarCol extends VectorExpression { @@ -91,7 +92,8 @@ public void evaluate(VectorizedRowBatch batch) { final int n = inputCol.isRepeating ? 1 : batch.size; int[] sel = batch.selected; final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; switch (primitiveCategory) { case DATE: @@ -107,15 +109,15 @@ public void evaluate(VectorizedRowBatch batch) { case VARCHAR: boolean parsed = dateParser.parseDate(new String(stringValue, StandardCharsets.UTF_8), baseDate); if (!parsed) { - outV.noNulls = false; + outputColVector.noNulls = false; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = true; + outputColVector.isNull[i] = true; } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = true; + outputColVector.isNull[i] = true; } } return; @@ -130,39 +132,73 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long baseDateDays = DateWritable.millisToDays(baseDate.getTime()); + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outputColVector.isNull[0] = false; + evaluate(baseDateDays, inputCol.vector[0], outputColVector, 0); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputCol.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - evaluate(baseDateDays, inputCol.vector[i], outV, i); + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluate(baseDateDays, inputCol.vector[i], outputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + evaluate(baseDateDays, inputCol.vector[i], outputColVector, i); + } } } else { - for(int i = 0; i < n; i++) { - evaluate(baseDateDays, inputCol.vector[i], outV, i); + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + evaluate(baseDateDays, inputCol.vector[i], outputColVector, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. - outV.noNulls = false; + outputColVector.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - evaluate(baseDateDays, inputCol.vector[i], outV, i); + evaluate(baseDateDays, inputCol.vector[i], outputColVector, i); } } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - evaluate(baseDateDays, inputCol.vector[i], outV, i); + evaluate(baseDateDays, inputCol.vector[i], outputColVector, i); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java index 982467e..0d794fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java @@ -89,6 +89,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse); LongColumnVector convertedVector1 = toDateArray(batch, inputTypeInfos[0], inputColVector1, dateVector1); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java index 97e3669..08c91e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java @@ -35,6 +35,7 @@ import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.Arrays; public class VectorUDFDateDiffColScalar extends VectorExpression { private static final long serialVersionUID = 1L; @@ -80,20 +81,21 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; ColumnVector inputCol = batch.cols[this.colNum]; /* every line below this is identical for evaluateLong & evaluateString */ final int n = inputCol.isRepeating ? 1 : batch.size; int[] sel = batch.selected; final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; + boolean[] outputIsNull = outputColVector.isNull; if(batch.size == 0) { /* n != batch.size when isRepeating */ return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; PrimitiveCategory primitiveCategory1 = ((PrimitiveTypeInfo) inputTypeInfos[1]).getPrimitiveCategory(); switch (primitiveCategory1) { @@ -114,15 +116,15 @@ public void evaluate(VectorizedRowBatch batch) { baseDate = DateWritable.dateToDays(date); break; } catch (Exception e) { - outV.noNulls = false; + outputColVector.noNulls = false; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = true; + outputColVector.isNull[i] = true; } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = true; + outputColVector.isNull[i] = true; } } return; @@ -134,35 +136,66 @@ public void evaluate(VectorizedRowBatch batch) { PrimitiveCategory primitiveCategory0 = ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory(); switch (primitiveCategory0) { case DATE: - if (inputCol.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = evaluateDate(inputCol, i); + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outputColVector.isNull[0] = false; + outputColVector.vector[0] = evaluateDate(inputCol, 0); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputCol.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.vector[i] = evaluateDate(inputCol, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.vector[i] = evaluateDate(inputCol, i); + } } } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = evaluateDate(inputCol, i); + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputColVector.vector[i] = evaluateDate(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. - outV.noNulls = false; + outputColVector.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateDate(inputCol, i); + outputColVector.vector[i] = evaluateDate(inputCol, i); } } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateDate(inputCol, i); + outputColVector.vector[i] = evaluateDate(inputCol, i); } } } @@ -170,35 +203,66 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: - if (inputCol.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = evaluateTimestamp(inputCol, i); + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outputColVector.isNull[0] = false; + outputColVector.vector[0] = evaluateTimestamp(inputCol, 0); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputCol.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); + } } } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = evaluateTimestamp(inputCol, i); + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. - outV.noNulls = false; + outputColVector.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateTimestamp(inputCol, i); + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); } } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateTimestamp(inputCol, i); + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); } } } @@ -208,35 +272,66 @@ public void evaluate(VectorizedRowBatch batch) { case STRING: case CHAR: case VARCHAR: - if (inputCol.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - evaluateString(inputCol, outV, i); + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outputColVector.isNull[0] = false; + evaluateString(inputCol, outputColVector, 0); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputCol.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluateString(inputCol, outputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + evaluateString(inputCol, outputColVector, i); + } } } else { - for(int i = 0; i < n; i++) { - evaluateString(inputCol, outV, i); + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + evaluateString(inputCol, outputColVector, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. - outV.noNulls = false; + outputColVector.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - evaluateString(inputCol, outV, i); + evaluateString(inputCol, outputColVector, i); } } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - evaluateString(inputCol, outV, i); + evaluateString(inputCol, outputColVector, i); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java index c575c05..c436c96 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java @@ -33,6 +33,7 @@ import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.Arrays; public class VectorUDFDateDiffScalarCol extends VectorExpression { private static final long serialVersionUID = 1L; @@ -78,20 +79,21 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; ColumnVector inputCol = batch.cols[this.colNum]; /* every line below this is identical for evaluateLong & evaluateString */ final int n = inputCol.isRepeating ? 1 : batch.size; int[] sel = batch.selected; final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; + boolean[] outputIsNull = outputColVector.isNull; if(batch.size == 0) { /* n != batch.size when isRepeating */ return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; PrimitiveCategory primitiveCategory0 = ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory(); @@ -113,15 +115,15 @@ public void evaluate(VectorizedRowBatch batch) { baseDate = DateWritable.dateToDays(date); break; } catch (Exception e) { - outV.noNulls = false; + outputColVector.noNulls = false; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = true; + outputColVector.isNull[i] = true; } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = true; + outputColVector.isNull[i] = true; } } return; @@ -134,35 +136,66 @@ public void evaluate(VectorizedRowBatch batch) { ((PrimitiveTypeInfo) inputTypeInfos[1]).getPrimitiveCategory(); switch (primitiveCategory1) { case DATE: - if (inputCol.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = evaluateDate(inputCol, i); + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outputColVector.isNull[0] = false; + outputColVector.vector[0] = evaluateDate(inputCol, 0); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputCol.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.vector[i] = evaluateDate(inputCol, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.vector[i] = evaluateDate(inputCol, i); + } } } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = evaluateDate(inputCol, i); + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputColVector.vector[i] = evaluateDate(inputCol, i); } } - } else { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. - outV.noNulls = false; + outputColVector.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateDate(inputCol, i); + outputColVector.vector[i] = evaluateDate(inputCol, i); } } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateDate(inputCol, i); + outputColVector.vector[i] = evaluateDate(inputCol, i); } } } @@ -170,35 +203,66 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: - if (inputCol.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = evaluateTimestamp(inputCol, i); + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outputColVector.isNull[0] = false; + outputColVector.vector[0] = evaluateTimestamp(inputCol, 0); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputCol.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); + } } } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = evaluateTimestamp(inputCol, i); + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. - outV.noNulls = false; + outputColVector.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateTimestamp(inputCol, i); + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); } } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateTimestamp(inputCol, i); + outputColVector.vector[i] = evaluateTimestamp(inputCol, i); } } } @@ -208,35 +272,66 @@ public void evaluate(VectorizedRowBatch batch) { case STRING: case CHAR: case VARCHAR: - if (inputCol.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - evaluateString(inputCol, outV, i); + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outputColVector.isNull[0] = false; + evaluateString(inputCol, outputColVector, 0); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + } else if (inputCol.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluateString(inputCol, outputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + evaluateString(inputCol, outputColVector, i); + } } } else { - for(int i = 0; i < n; i++) { - evaluateString(inputCol, outV, i); + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + evaluateString(inputCol, outputColVector, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. - outV.noNulls = false; + outputColVector.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - evaluateString(inputCol, outV, i); + evaluateString(inputCol, outputColVector, i); } } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; + outputColVector.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - evaluateString(inputCol, outV, i); + evaluateString(inputCol, outputColVector, i); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java index 9d72bdf..1f2d5cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java @@ -55,7 +55,10 @@ public void evaluate(VectorizedRowBatch batch) { // indexColumnVector includes the keys of Map indexColumnVector = batch.cols[indexColumnNum]; - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + int[] mapValueIndex; if (mapV.isRepeating) { if (mapV.isNull[0]) { @@ -71,9 +74,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.noNulls = false; } else { // the key is found in MapColumnVector, set the value - outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); outV.isNull[0] = false; - outV.noNulls = true; + outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); } outV.isRepeating = true; } else { @@ -97,8 +99,8 @@ private void setUnRepeatingOutVector(VectorizedRowBatch batch, MapColumnVector m outV.isNull[j] = true; outV.noNulls = false; } else { - outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values); outV.isNull[j] = false; + outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values); } } outV.isRepeating = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java index e6a86ae..a7d730b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java @@ -50,7 +50,10 @@ public void evaluate(VectorizedRowBatch batch) { ColumnVector outV = batch.cols[outputColumnNum]; MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum]; - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + int[] mapValueIndex; if (mapV.isRepeating) { if (mapV.isNull[0]) { @@ -65,7 +68,6 @@ public void evaluate(VectorizedRowBatch batch) { } else { // the key is found in MapColumnVector, set the value outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); - outV.noNulls = true; } } outV.isRepeating = true; @@ -77,8 +79,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[j] = true; outV.noNulls = false; } else { - outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values); outV.isNull[j] = false; + outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values); } } outV.isRepeating = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java index 519a4e4..eb6d6dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; import java.util.Calendar; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -82,53 +83,85 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; ColumnVector inputColVec = batch.cols[this.colNum]; /* every line below this is identical for evaluateLong & evaluateString */ final int n = inputColVec.isRepeating ? 1 : batch.size; int[] sel = batch.selected; final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse; + boolean[] outputIsNull = outputColVector.isNull; if(batch.size == 0) { /* n != batch.size when isRepeating */ return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputColVec.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; LongColumnVector longColVector = (LongColumnVector) inputColVec; + if (inputColVec.isRepeating) { + if (inputColVec.noNulls || !inputColVec.isNull[0]) { + outputColVector.isNull[0] = false; + outputColVector.vector[0] = getDateField(longColVector.vector[0]); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVec.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = getDateField(longColVector.vector[i]); + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.vector[i] = getDateField(longColVector.vector[i]); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.vector[i] = getDateField(longColVector.vector[i]); + } } } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = getDateField(longColVector.vector[i]); + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputColVector.vector[i] = getDateField(longColVector.vector[i]); } } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. - outV.noNulls = false; + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; - outV.isNull[i] = inputColVec.isNull[i]; + outputColVector.isNull[i] = inputColVec.isNull[i]; if (!inputColVec.isNull[i]) { - outV.vector[i] = getDateField(longColVector.vector[i]); + outputColVector.vector[i] = getDateField(longColVector.vector[i]); } } } else { for(int i = 0; i < n; i++) { - outV.isNull[i] = inputColVec.isNull[i]; + outputColVector.isNull[i] = inputColVec.isNull[i]; if (!inputColVec.isNull[i]) { - outV.vector[i] = getDateField(longColVector.vector[i]); + outputColVector.vector[i] = getDateField(longColVector.vector[i]); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java index c5762d1..2918546 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java @@ -106,11 +106,27 @@ public void evaluate(VectorizedRowBatch batch) { return; } - // true for all algebraic UDFs with no state - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + try { + outV.isNull[0] = false; + outV.vector[0] = getField(inputCol.vector[0], inputCol.start[0], inputCol.length[0]); + } catch (ParseException e) { + outV.noNulls = false; + outV.isNull[0] = true; + } + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for (int j = 0; j < n; j++) { int i = sel[j]; @@ -133,11 +149,11 @@ public void evaluate(VectorizedRowBatch batch) { } } } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + // Carefully handle NULLs... outV.noNulls = false; + if (selectedInUse) { for (int j = 0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java index 54cb5d8..740a00c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; import java.util.Calendar; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -95,27 +96,41 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputColVec.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVec; + if (inputColVec.isRepeating) { + if (inputColVec.noNulls || !inputColVec.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = getTimestampField(timestampColVector, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + if (inputColVec.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = getTimestampField(timestampColVector, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = getTimestampField(timestampColVector, i); } } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... outV.noNulls = false; + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java index 6ebd7d3..18bacc5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java @@ -151,7 +151,7 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) Aggregation myagg = (Aggregation) agg; if (inputColumn.isRepeating) { - if (inputColumn.noNulls) { + if (inputColumn.noNulls || !inputColumn.isNull[0]) { valueProcessor.processValue(myagg, inputColumn, 0); } return; @@ -251,7 +251,11 @@ public void aggregateInputSelection( } } else { if (inputColumn.isRepeating) { - // All nulls, no-op for min/max + if (!inputColumn.isNull[0]) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + inputColumn, batchSize); + } } else { if (batch.selectedInUse) { iterateHasNullsSelectionWithAggregationSelection( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java index 8f1375e..fe5e33a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java @@ -123,7 +123,7 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) Aggregation myagg = (Aggregation) agg; if (inputColumn.isRepeating) { - if (inputColumn.noNulls) { + if (inputColumn.noNulls || !inputColumn.isNull[0]) { processValue(myagg, inputColumn, 0); } return; @@ -223,7 +223,11 @@ public void aggregateInputSelection( } } else { if (inputColumn.isRepeating) { - // All nulls, no-op for min/max + if (!inputColumn.isNull[0]) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + inputColumn, batchSize); + } } else { if (batch.selectedInUse) { iterateHasNullsSelectionWithAggregationSelection( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java index 888f5f0..0463de5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java @@ -37,155 +37,155 @@ private static final long serialVersionUID = 1L; - /** - * class for storing the current aggregate value. - */ - static class Aggregation implements AggregationBuffer { - - private static final long serialVersionUID = 1L; + /** + * class for storing the current aggregate value. + */ + static class Aggregation implements AggregationBuffer { - transient private long value; + private static final long serialVersionUID = 1L; - @Override - public int getVariableSize() { - throw new UnsupportedOperationException(); - } + private transient long value; - @Override - public void reset() { - value = 0L; - } + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); } - // This constructor is used to momentarily create the object so match can be called. - public VectorUDAFCountMerge() { - super(); + @Override + public void reset() { + value = 0L; } + } - public VectorUDAFCountMerge(VectorAggregationDesc vecAggrDesc) { - super(vecAggrDesc); - init(); - } + // This constructor is used to momentarily create the object so match can be called. + public VectorUDAFCountMerge() { + super(); + } - private void init() { - } + public VectorUDAFCountMerge(VectorAggregationDesc vecAggrDesc) { + super(vecAggrDesc); + init(); + } - private Aggregation getCurrentAggregationBuffer( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - int row) { - VectorAggregationBufferRow mySet = aggregationBufferSets[row]; - Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); - return myagg; - } + private void init() { + } - @Override - public void aggregateInputSelection( + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( VectorAggregationBufferRow[] aggregationBufferSets, int aggregateIndex, VectorizedRowBatch batch) throws HiveException { - int batchSize = batch.size; + int batchSize = batch.size; - if (batchSize == 0) { - return; - } + if (batchSize == 0) { + return; + } - inputExpression.evaluate(batch); + inputExpression.evaluate(batch); - LongColumnVector inputVector = - (LongColumnVector) batch.cols[ - this.inputExpression.getOutputColumnNum()]; + LongColumnVector inputVector = + (LongColumnVector) batch.cols[ + this.inputExpression.getOutputColumnNum()]; - long[] vector = inputVector.vector; + long[] vector = inputVector.vector; - if (inputVector.noNulls) { - if (inputVector.isRepeating) { - iterateNoNullsRepeatingWithAggregationSelection( + if (inputVector.noNulls) { + if (inputVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( aggregationBufferSets, aggregateIndex, vector[0], batchSize); - } else { - if (batch.selectedInUse) { - iterateNoNullsSelectionWithAggregationSelection( + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( aggregationBufferSets, aggregateIndex, vector, batch.selected, batchSize); - } else { - iterateNoNullsWithAggregationSelection( + } else { + iterateNoNullsWithAggregationSelection( aggregationBufferSets, aggregateIndex, vector, batchSize); - } } - } else { - if (inputVector.isRepeating) { - if (batch.selectedInUse) { - iterateHasNullsRepeatingSelectionWithAggregationSelection( + } + } else { + if (inputVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( aggregationBufferSets, aggregateIndex, vector[0], batchSize, batch.selected, inputVector.isNull); - } else { - iterateHasNullsRepeatingWithAggregationSelection( + } else { + iterateHasNullsRepeatingWithAggregationSelection( aggregationBufferSets, aggregateIndex, vector[0], batchSize, inputVector.isNull); - } - } else { - if (batch.selectedInUse) { - iterateHasNullsSelectionWithAggregationSelection( + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( aggregationBufferSets, aggregateIndex, vector, batchSize, batch.selected, inputVector.isNull); - } else { - iterateHasNullsWithAggregationSelection( + } else { + iterateHasNullsWithAggregationSelection( aggregationBufferSets, aggregateIndex, vector, batchSize, inputVector.isNull); - } } } } + } - private void iterateNoNullsRepeatingWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - long value, - int batchSize) { + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + long value, + int batchSize) { - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( aggregationBufferSets, aggregateIndex, i); - myagg.value += value; - } - } - - private void iterateNoNullsSelectionWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - long[] values, - int[] selection, - int batchSize) { - - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( + myagg.value += value; + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + long[] values, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( aggregationBufferSets, aggregateIndex, i); - myagg.value += values[selection[i]]; - } + myagg.value += values[selection[i]]; } + } - private void iterateNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - long[] values, - int batchSize) { - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + long[] values, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( aggregationBufferSets, aggregateIndex, i); - myagg.value += values[i]; - } + myagg.value += values[i]; } + } - private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( VectorAggregationBufferRow[] aggregationBufferSets, int aggregateIndex, long value, @@ -193,41 +193,41 @@ private void iterateHasNullsRepeatingSelectionWithAggregationSelection( int[] selection, boolean[] isNull) { - if (isNull[0]) { - return; - } - - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( + if (isNull[0]) { + return; + } + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( aggregationBufferSets, aggregateIndex, i); - myagg.value += value; - } - + myagg.value += value; } + + } - private void iterateHasNullsRepeatingWithAggregationSelection( + private void iterateHasNullsRepeatingWithAggregationSelection( VectorAggregationBufferRow[] aggregationBufferSets, int aggregateIndex, long value, int batchSize, boolean[] isNull) { - if (isNull[0]) { - return; - } + if (isNull[0]) { + return; + } - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( aggregationBufferSets, aggregateIndex, i); - myagg.value += value; - } + myagg.value += value; } + } - private void iterateHasNullsSelectionWithAggregationSelection( + private void iterateHasNullsSelectionWithAggregationSelection( VectorAggregationBufferRow[] aggregationBufferSets, int aggregateIndex, long[] values, @@ -235,146 +235,146 @@ private void iterateHasNullsSelectionWithAggregationSelection( int[] selection, boolean[] isNull) { - for (int j=0; j < batchSize; ++j) { - int i = selection[j]; - if (!isNull[i]) { - Aggregation myagg = getCurrentAggregationBuffer( + for (int j=0; j < batchSize; ++j) { + int i = selection[j]; + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( aggregationBufferSets, aggregateIndex, j); - myagg.value += values[i]; - } + myagg.value += values[i]; } - } + } + } - private void iterateHasNullsWithAggregationSelection( + private void iterateHasNullsWithAggregationSelection( VectorAggregationBufferRow[] aggregationBufferSets, int aggregateIndex, long[] values, int batchSize, boolean[] isNull) { - for (int i=0; i < batchSize; ++i) { - if (!isNull[i]) { - Aggregation myagg = getCurrentAggregationBuffer( + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( aggregationBufferSets, aggregateIndex, i); - myagg.value += values[i]; - } + myagg.value += values[i]; } - } + } + } - @Override - public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) - throws HiveException { + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { - inputExpression.evaluate(batch); + inputExpression.evaluate(batch); - LongColumnVector inputVector = - (LongColumnVector) batch.cols[ - this.inputExpression.getOutputColumnNum()]; + LongColumnVector inputVector = + (LongColumnVector) batch.cols[ + this.inputExpression.getOutputColumnNum()]; - int batchSize = batch.size; + int batchSize = batch.size; - if (batchSize == 0) { - return; - } + if (batchSize == 0) { + return; + } - Aggregation myagg = (Aggregation)agg; + Aggregation myagg = (Aggregation)agg; - long[] vector = inputVector.vector; - - if (inputVector.isRepeating) { - if (inputVector.noNulls) { - myagg.value += vector[0]*batchSize; - } - return; - } + long[] vector = inputVector.vector; - if (!batch.selectedInUse && inputVector.noNulls) { - iterateNoSelectionNoNulls(myagg, vector, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull); - } - else if (inputVector.noNulls){ - iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected); - } - else { - iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); + if (inputVector.isRepeating) { + if (inputVector.noNulls || !inputVector.isNull[0]) { + myagg.value += vector[0]*batchSize; } + return; } - - private void iterateSelectionHasNulls( - Aggregation myagg, - long[] vector, - int batchSize, - boolean[] isNull, - int[] selected) { - - for (int j=0; j< batchSize; ++j) { - int i = selected[j]; - if (!isNull[i]) { - myagg.value += vector[i]; - } - } + + if (!batch.selectedInUse && inputVector.noNulls) { + iterateNoSelectionNoNulls(myagg, vector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull); + } + else if (inputVector.noNulls){ + iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); } + } - private void iterateSelectionNoNulls( - Aggregation myagg, - long[] vector, - int batchSize, - int[] selected) { + private void iterateSelectionHasNulls( + Aggregation myagg, + long[] vector, + int batchSize, + boolean[] isNull, + int[] selected) { - for (int i=0; i< batchSize; ++i) { - myagg.value += vector[selected[i]]; + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!isNull[i]) { + myagg.value += vector[i]; } } + } - private void iterateNoSelectionHasNulls( - Aggregation myagg, - long[] vector, - int batchSize, - boolean[] isNull) { - - for(int i=0;i newVectorReducer; try { @@ -4102,9 +4102,6 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { for (int i = 0; i < size; i++) { ExprNodeDesc expr = colList.get(i); VectorExpression ve = vContext.getVectorExpression(expr); - if (ve.getOutputColumnNum() == -1) { - fake++; - } projectedOutputColumns[i] = ve.getOutputColumnNum(); if (ve instanceof IdentityExpression) { // Suppress useless evaluation. diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java index acb3198..f2adc08 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java @@ -341,8 +341,7 @@ private void longColAddLongColumnUtil(boolean isChecked) { lcv1.noNulls = true; lcv0.isRepeating = false; lcv1.isRepeating = false; - lcv2.noNulls = false; // set output noNulls to true to make sure it gets over-written - lcv2.isRepeating = true; // similarly with isRepeating + lcv2.reset(); expr.evaluate(vrg); assertTrue(lcv2.noNulls); assertFalse(lcv2.isRepeating); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java index c646bf1..ea19e93 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java @@ -188,7 +188,6 @@ public void testLongColumnColumnIfExpr() { assertEquals(2, r.vector[1]); assertEquals(-3, r.vector[2]); assertEquals(-4, r.vector[3]); - assertEquals(true, r.noNulls); assertEquals(false, r.isRepeating); // verify when first argument (boolean flags) is repeating @@ -230,7 +229,6 @@ public void testLongColumnColumnIfExpr() { assertEquals(2, r.vector[1]); assertEquals(3, r.vector[2]); assertEquals(-4, r.vector[3]); - assertEquals(true, r.noNulls); assertEquals(false, r.isRepeating); // test when second argument has nulls @@ -308,7 +306,6 @@ public void testDoubleColumnColumnIfExpr() { assertEquals(true, 2d == r.vector[1]); assertEquals(true, -3d == r.vector[2]); assertEquals(true, -4d == r.vector[3]); - assertEquals(true, r.noNulls); assertEquals(false, r.isRepeating); } @@ -480,7 +477,6 @@ public void testIfExprStringColumnStringScalar() { assertTrue(getString(r, 1).equals("scalar")); assertTrue(getString(r, 2).equals("arg2_2")); assertTrue(getString(r, 3).equals("arg2_3")); - assertTrue(r.noNulls); // test for null input strings batch = getBatch1Long3BytesVectors(); @@ -504,7 +500,6 @@ public void testIfExprStringScalarStringColumn() { assertTrue(getString(r, 1).equals("arg3_1")); assertTrue(getString(r, 2).equals("scalar")); assertTrue(getString(r, 3).equals("scalar")); - assertTrue(r.noNulls); // test for null input strings batch = getBatch1Long3BytesVectors(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java index bd5a6b7..a60b9e4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java @@ -67,7 +67,6 @@ public void testLongColOrLongCol() { expr.evaluate(batch); // spot check - Assert.assertTrue(outCol.noNulls); Assert.assertEquals(0, outCol.vector[0]); Assert.assertEquals(1, outCol.vector[1]); Assert.assertEquals(1, outCol.vector[2]); @@ -125,7 +124,6 @@ public void testLongColAndLongCol() { expr.evaluate(batch); // spot check - Assert.assertTrue(outCol.noNulls); Assert.assertEquals(0, outCol.vector[0]); Assert.assertEquals(0, outCol.vector[1]); Assert.assertEquals(0, outCol.vector[2]); @@ -207,7 +205,6 @@ public void testBooleanNot() { batch.cols[0].noNulls = true; expr.evaluate(batch); Assert.assertFalse(outCol.isRepeating); - Assert.assertTrue(outCol.noNulls); Assert.assertEquals(1, outCol.vector[0]); Assert.assertEquals(0, outCol.vector[2]); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java index ca3c259..202f18c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java @@ -3757,7 +3757,6 @@ public void testStringColCompareStringColProjection() { expr.evaluate(batch); Assert.assertEquals(4, batch.size); outVector = ((LongColumnVector) batch.cols[3]).vector; - Assert.assertFalse(batch.cols[3].noNulls); Assert.assertFalse(batch.cols[3].isNull[0]); Assert.assertEquals(1, outVector[0]); Assert.assertFalse(batch.cols[3].isNull[1]); @@ -3821,7 +3820,6 @@ public void testStringColCompareStringColProjection() { expr.evaluate(batch); outVector = ((LongColumnVector) batch.cols[3]).vector; Assert.assertEquals(4, batch.size); - Assert.assertFalse(batch.cols[3].noNulls); Assert.assertFalse(batch.cols[3].isNull[0]); Assert.assertEquals(1, outVector[0]); Assert.assertFalse(batch.cols[3].isNull[1]); @@ -4064,7 +4062,6 @@ public void testColLower() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatchMixedCase(); @@ -4124,7 +4121,6 @@ public void testStringLength() { expr.evaluate(batch); outCol = (LongColumnVector) batch.cols[1]; Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); Assert.assertEquals(7, outCol.vector[0]); // length of "mixedUp" // no nulls, is repeating @@ -4486,7 +4482,6 @@ public void testColConcatStringScalar() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4549,7 +4544,6 @@ public void testColConcatCharScalar() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4612,7 +4606,6 @@ public void testColConcatVarCharScalar() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4675,7 +4668,6 @@ public void testStringScalarConcatCol() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4738,7 +4730,6 @@ public void testCharScalarConcatCol() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4801,7 +4792,6 @@ public void testVarCharScalarConcatCol() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4923,7 +4913,6 @@ public void testColConcatCol() { batch.cols[0].noNulls = true; expr.evaluate(batch); Assert.assertEquals(false, outCol.isRepeating); - Assert.assertEquals(true, outCol.noNulls); cmp = StringExpr.compare(red, 0, red.length, outCol.vector[2], outCol.start[2], outCol.length[2]); Assert.assertEquals(0, cmp); @@ -5015,7 +5004,6 @@ public void testSubstrStart() throws UnsupportedEncodingException { expr.evaluate(batch); outCol = (BytesColumnVector) batch.cols[1]; Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, StringExpr.compare( @@ -5043,7 +5031,6 @@ public void testSubstrStart() throws UnsupportedEncodingException { expr = new StringSubstrColStart(0, 1, 1); expr.evaluate(batch); Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, @@ -5128,7 +5115,6 @@ public void testSubstrStart() throws UnsupportedEncodingException { expr.evaluate(batch); outCol = (BytesColumnVector) batch.cols[1]; Assert.assertFalse(outV.isRepeating); - Assert.assertTrue(outV.noNulls); Assert.assertEquals(0, StringExpr.compare( // 3nd char starts from index 3 and total length should be 7 bytes as max is 10 @@ -5153,7 +5139,6 @@ public void testSubstrStart() throws UnsupportedEncodingException { expr = new StringSubstrColStart(0, 2, 1); expr.evaluate(batch); Assert.assertFalse(outV.isRepeating); - Assert.assertTrue(outV.noNulls); Assert.assertEquals(0, StringExpr.compare( // the result is the last 1 character, which occupies 4 bytes @@ -5190,7 +5175,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { expr.evaluate(batch); BytesColumnVector outCol = (BytesColumnVector) batch.cols[1]; Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); byte[] expected = "string".getBytes("UTF-8"); Assert.assertEquals(0, @@ -5218,7 +5202,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { expr = new StringSubstrColStartLen(0, -6, 6, 1); expr.evaluate(batch); outCol = (BytesColumnVector) batch.cols[1]; - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(3, batch.size); @@ -5250,7 +5233,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { outCol = (BytesColumnVector) batch.cols[1]; expr.evaluate(batch); Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, StringExpr.compare( @@ -5280,7 +5262,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { outCol = (BytesColumnVector) batch.cols[1]; expr.evaluate(batch); Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, StringExpr.compare( @@ -5310,7 +5291,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { expr.evaluate(batch); outCol = (BytesColumnVector) batch.cols[1]; Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, StringExpr.compare( @@ -5391,7 +5371,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { expr.evaluate(batch); Assert.assertEquals(1, batch.size); Assert.assertFalse(outV.isRepeating); - Assert.assertTrue(outV.noNulls); Assert.assertEquals(0, StringExpr.compare( // 3rd char starts at index 3, and with length 2 it is covering the rest of the array. @@ -5415,7 +5394,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { outCol = (BytesColumnVector) batch.cols[1]; Assert.assertEquals(1, batch.size); Assert.assertFalse(outV.isRepeating); - Assert.assertTrue(outV.noNulls); Assert.assertEquals(0, StringExpr.compare( // 2nd substring index refers to the 6th index (last char in the array) diff --git ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q index 449bea2..ab65e9d 100644 --- ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q +++ ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q @@ -1,4 +1,4 @@ -SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.enabled=false; set hive.fetch.task.conversion=none; SET hive.auto.convert.join=true; @@ -10,6 +10,9 @@ create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20) insert overwrite table varchar_udf_1 select key, value, key, value from src where key = '238' limit 1; +-- Add a single NULL row that will come from ORC as isRepeated. +insert into varchar_udf_1 values (NULL, NULL, NULL, NULL); + DROP TABLE IF EXISTS DECIMAL_UDF_txt; DROP TABLE IF EXISTS DECIMAL_UDF; @@ -25,11 +28,17 @@ STORED AS ORC; INSERT OVERWRITE TABLE DECIMAL_UDF SELECT * FROM DECIMAL_UDF_txt; +-- Add a single NULL row that will come from ORC as isRepeated. +insert into DECIMAL_UDF values (NULL, NULL); + drop table if exists count_case_groupby; create table count_case_groupby (key string, bool boolean) STORED AS orc; insert into table count_case_groupby values ('key1', true),('key2', false),('key3', NULL),('key4', false),('key5',NULL); +-- Add a single NULL row that will come from ORC as isRepeated. +insert into table count_case_groupby values (NULL, NULL); + set hive.vectorized.adaptor.usage.mode=none; explain vectorization expression @@ -37,39 +46,39 @@ select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1; +from varchar_udf_1; select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1; +from varchar_udf_1; explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1; +from varchar_udf_1; select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1; +from varchar_udf_1; explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1; +from varchar_udf_1; select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1; +from varchar_udf_1; set hive.vectorized.adaptor.usage.mode=chosen; @@ -79,39 +88,39 @@ select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1; +from varchar_udf_1; select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1; +from varchar_udf_1; explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1; +from varchar_udf_1; select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1; +from varchar_udf_1; explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1; +from varchar_udf_1; select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1; +from varchar_udf_1; set hive.vectorized.adaptor.usage.mode=none; diff --git ql/src/test/queries/clientpositive/vector_between_columns.q ql/src/test/queries/clientpositive/vector_between_columns.q index 5a2714e..a8e9ca4 100644 --- ql/src/test/queries/clientpositive/vector_between_columns.q +++ ql/src/test/queries/clientpositive/vector_between_columns.q @@ -22,8 +22,13 @@ load data local inpath '../../data/files/TINT' into table TINT_txt; create table TSINT stored as orc AS SELECT * FROM TSINT_txt; +-- Add a single NULL row that will come from ORC as isRepeated. +insert into TSINT values (NULL, NULL); + create table TINT stored as orc AS SELECT * FROM TINT_txt; +-- Add a single NULL row that will come from ORC as isRepeated. +insert into TINT values (NULL, NULL); explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint; diff --git ql/src/test/queries/clientpositive/vector_between_in.q ql/src/test/queries/clientpositive/vector_between_in.q index c336829..b87d646 100644 --- ql/src/test/queries/clientpositive/vector_between_in.q +++ ql/src/test/queries/clientpositive/vector_between_in.q @@ -5,6 +5,9 @@ set hive.fetch.task.conversion=none; CREATE TABLE decimal_date_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, CAST(CAST((CAST(cint AS BIGINT) *ctinyint) AS TIMESTAMP) AS DATE) AS cdate FROM alltypesorc ORDER BY cdate; +-- Add a single NULL row that will come from ORC as isRepeated. +insert into decimal_date_test values (NULL, NULL, NULL, NULL); + EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate; EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)); diff --git ql/src/test/queries/clientpositive/vector_bround.q ql/src/test/queries/clientpositive/vector_bround.q index ec192bf..0bada68 100644 --- ql/src/test/queries/clientpositive/vector_bround.q +++ ql/src/test/queries/clientpositive/vector_bround.q @@ -14,6 +14,9 @@ values (2.51, 1.251), (3.51, 1.351); +-- Add a single NULL row that will come from ORC as isRepeated. +insert into test_vector_bround values (NULL, NULL); + set hive.vectorized.execution.enabled=true; explain vectorization detail diff --git ql/src/test/queries/clientpositive/vector_char_2.q ql/src/test/queries/clientpositive/vector_char_2.q index 5520ddd..d3a2e93 100644 --- ql/src/test/queries/clientpositive/vector_char_2.q +++ ql/src/test/queries/clientpositive/vector_char_2.q @@ -12,6 +12,9 @@ create table char_2 ( insert overwrite table char_2 select * from src; +-- Add a single NULL row that will come from ORC as isRepeated. +insert into char_2 values (NULL, NULL); + select value, sum(cast(key as int)), count(*) numrows from src group by value diff --git ql/src/test/queries/clientpositive/vector_coalesce_2.q ql/src/test/queries/clientpositive/vector_coalesce_2.q index ea45ddd..a264edd 100644 --- ql/src/test/queries/clientpositive/vector_coalesce_2.q +++ ql/src/test/queries/clientpositive/vector_coalesce_2.q @@ -1,12 +1,15 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; -SET hive.vectorized.execution.enabled=false; +SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; create table str_str_orc (str1 string, str2 string) stored as orc; insert into table str_str_orc values (null, "X"), ("0", "X"), ("1", "X"), (null, "y"); +-- Add a single NULL row that will come from ORC as isRepeated. +insert into str_str_orc values (NULL, NULL); + EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result diff --git ql/src/test/queries/clientpositive/vector_coalesce_3.q ql/src/test/queries/clientpositive/vector_coalesce_3.q index e3d9f0a..7d5e82c 100644 --- ql/src/test/queries/clientpositive/vector_coalesce_3.q +++ ql/src/test/queries/clientpositive/vector_coalesce_3.q @@ -11,7 +11,12 @@ CREATE TABLE test_2 (member BIGINT) STORED AS ORC; INSERT INTO test_1 VALUES (3,1),(2,2); INSERT INTO test_2 VALUES (1),(2),(3),(4); -EXPLAIN +-- Add a single NULL row that will come from ORC as isRepeated. +insert into test_1 values (NULL, NULL); +insert into test_2 values (NULL); + + +EXPLAIN VECTORIZATION DETAIL SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member; diff --git ql/src/test/queries/clientpositive/vector_data_types.q ql/src/test/queries/clientpositive/vector_data_types.q index 240fc89..b796e71 100644 --- ql/src/test/queries/clientpositive/vector_data_types.q +++ ql/src/test/queries/clientpositive/vector_data_types.q @@ -37,6 +37,9 @@ STORED AS ORC; INSERT INTO TABLE over1korc SELECT * FROM over1k; +-- Add a single NULL row that will come from ORC as isRepeated. +insert into over1korc values (NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL); + SET hive.vectorized.execution.enabled=false; EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i LIMIT 20; @@ -52,5 +55,9 @@ EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, `dec`, bin SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i LIMIT 20; +EXPLAIN VECTORIZATION EXPRESSION +SELECT SUM(HASH(*)) +FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q; + SELECT SUM(HASH(*)) FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q; diff --git ql/src/test/queries/clientpositive/vector_date_1.q ql/src/test/queries/clientpositive/vector_date_1.q index 0055973..bb515b1 100644 --- ql/src/test/queries/clientpositive/vector_date_1.q +++ ql/src/test/queries/clientpositive/vector_date_1.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; drop table if exists vector_date_1; create table vector_date_1 (dt1 date, dt2 date) stored as orc; @@ -13,8 +14,10 @@ insert into table vector_date_1 insert into table vector_date_1 select date '2001-01-01', date '2001-06-01' from src limit 1; +select * from vector_date_1 order by dt1, dt2; + -- column-to-column comparison in select clause -explain +explain vectorization detail select dt1, dt2, -- should be all true @@ -41,7 +44,7 @@ select dt2 > dt1 from vector_date_1 order by dt1; -explain +explain vectorization detail select dt1, dt2, -- should be all false @@ -69,7 +72,7 @@ select from vector_date_1 order by dt1; -- column-to-literal/literal-to-column comparison in select clause -explain +explain vectorization detail select dt1, -- should be all true @@ -96,7 +99,7 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1; -explain +explain vectorization detail select dt1, -- should all be false @@ -126,7 +129,7 @@ from vector_date_1 order by dt1; -- column-to-column comparisons in predicate -- all rows with non-null dt1 should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -153,7 +156,7 @@ order by dt1; -- column-to-literal/literal-to-column comparison in predicate -- only a single row should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -182,7 +185,7 @@ where and date '1970-01-01' <= dt1 order by dt1; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01'); SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01'); diff --git ql/src/test/queries/clientpositive/vector_decimal_1.q ql/src/test/queries/clientpositive/vector_decimal_1.q index 321275f..d47de3a 100644 --- ql/src/test/queries/clientpositive/vector_decimal_1.q +++ ql/src/test/queries/clientpositive/vector_decimal_1.q @@ -11,6 +11,9 @@ desc decimal_1; insert overwrite table decimal_1 select cast('17.29' as decimal(4,2)), 3.1415926BD, 3115926.54321BD from src tablesample (1 rows); + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into decimal_1 values (NULL, NULL, NULL); explain vectorization detail select cast(t as boolean) from decimal_1 order by t; diff --git ql/src/test/queries/clientpositive/vector_decimal_aggregate.q ql/src/test/queries/clientpositive/vector_decimal_aggregate.q index 6fbf4ba..c3a8318 100644 --- ql/src/test/queries/clientpositive/vector_decimal_aggregate.q +++ ql/src/test/queries/clientpositive/vector_decimal_aggregate.q @@ -1,11 +1,15 @@ set hive.explain.user=false; set hive.fetch.task.conversion=none; +set hive.stats.column.autogather=true; CREATE TABLE decimal_vgby STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, cint FROM alltypesorc; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into decimal_vgby values (NULL, NULL, NULL, NULL); SET hive.vectorized.execution.enabled=true; @@ -49,6 +53,9 @@ CREATE TABLE decimal_vgby_small STORED AS TEXTFILE AS cint FROM alltypesorc; +-- Add a single NULL row that will come from ORC as isRepeated. +insert into decimal_vgby_small values (NULL, NULL, NULL, NULL); + EXPLAIN VECTORIZATION DETAIL SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), @@ -63,6 +70,13 @@ SELECT cint, GROUP BY cint HAVING COUNT(*) > 1; +SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q; + -- Now add the others... EXPLAIN VECTORIZATION DETAIL SELECT cint, @@ -77,3 +91,10 @@ SELECT cint, FROM decimal_vgby_small GROUP BY cint HAVING COUNT(*) > 1; + +SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q; diff --git ql/src/test/queries/clientpositive/vector_decimal_expressions.q ql/src/test/queries/clientpositive/vector_decimal_expressions.q index 0b41eec..6790f27 100644 --- ql/src/test/queries/clientpositive/vector_decimal_expressions.q +++ ql/src/test/queries/clientpositive/vector_decimal_expressions.q @@ -1,10 +1,16 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.fetch.task.conversion=none; +set hive.stats.column.autogather=false; -- SORT_QUERY_RESULTS -CREATE TABLE decimal_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc; +CREATE TABLE decimal_test (cdouble double,cdecimal1 DECIMAL(20,10), cdecimal2 DECIMAL(23,14)) STORED AS ORC; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into decimal_test values (NULL, NULL, NULL); + +INSERT INTO TABLE decimal_test SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc; SET hive.vectorized.execution.enabled=true; @@ -17,6 +23,10 @@ SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1 ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10; +SELECT SUM(HASH(*)) +FROM (SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14) q; + -- DECIMAL_64 CREATE TABLE decimal_test_small STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(10,3)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(7,2)) AS cdecimal2 FROM alltypesorc; @@ -29,3 +39,7 @@ LIMIT 10; SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test_small WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14 LIMIT 10; + +SELECT SUM(HASH(*)) +FROM (SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test_small WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14) q; diff --git ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q index ee9f333..4264937 100644 --- ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q +++ ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q @@ -4,6 +4,9 @@ set hive.fetch.task.conversion=none; CREATE TABLE decimal_test STORED AS ORC AS SELECT cbigint, cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc; +-- Add a single NULL row that will come from ORC as isRepeated. +insert into decimal_test values (NULL, NULL, NULL, NULL); + SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; diff --git ql/src/test/queries/clientpositive/vector_decimal_udf.q ql/src/test/queries/clientpositive/vector_decimal_udf.q index 13e5686..091f502 100644 --- ql/src/test/queries/clientpositive/vector_decimal_udf.q +++ ql/src/test/queries/clientpositive/vector_decimal_udf.q @@ -3,6 +3,8 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=minimal; +-- SORT_QUERY_RESULTS + DROP TABLE IF EXISTS DECIMAL_UDF_txt; DROP TABLE IF EXISTS DECIMAL_UDF; @@ -18,6 +20,10 @@ STORED AS ORC; INSERT OVERWRITE TABLE DECIMAL_UDF SELECT * FROM DECIMAL_UDF_txt; +-- Add a single NULL row that will come from ORC as isRepeated. +insert into DECIMAL_UDF values (NULL, NULL); + + -- addition EXPLAIN VECTORIZATION DETAIL SELECT key + key FROM DECIMAL_UDF; @@ -75,13 +81,13 @@ SELECT key * '2.0' FROM DECIMAL_UDF; -- division EXPLAIN VECTORIZATION DETAIL -SELECT key / 0 FROM DECIMAL_UDF limit 1; -SELECT key / 0 FROM DECIMAL_UDF limit 1; +SELECT key / 0 FROM DECIMAL_UDF; +SELECT key / 0 FROM DECIMAL_UDF; -- Output not stable. -- EXPLAIN VECTORIZATION DETAIL --- SELECT key / NULL FROM DECIMAL_UDF limit 1; --- SELECT key / NULL FROM DECIMAL_UDF limit 1; +-- SELECT key / NULL FROM DECIMAL_UDF; +-- SELECT key / NULL FROM DECIMAL_UDF; EXPLAIN VECTORIZATION DETAIL SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0; @@ -182,6 +188,9 @@ STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_UDF_txt_small; +-- Add a single NULL row. +insert into DECIMAL_UDF_txt_small values (NULL, NULL); + -- addition EXPLAIN VECTORIZATION DETAIL SELECT key + key FROM DECIMAL_UDF_txt_small; @@ -239,13 +248,13 @@ SELECT key * '2.0' FROM DECIMAL_UDF_txt_small; -- division EXPLAIN VECTORIZATION DETAIL -SELECT key / 0 FROM DECIMAL_UDF_txt_small limit 1; -SELECT key / 0 FROM DECIMAL_UDF_txt_small limit 1; +SELECT key / 0 FROM DECIMAL_UDF_txt_small; +SELECT key / 0 FROM DECIMAL_UDF_txt_small; -- Output not stable. -- EXPLAIN VECTORIZATION DETAIL --- SELECT key / NULL FROM DECIMAL_UDF_txt_small limit 1; --- SELECT key / NULL FROM DECIMAL_UDF_txt_small limit 1; +-- SELECT key / NULL FROM DECIMAL_UDF_txt_small; +-- SELECT key / NULL FROM DECIMAL_UDF_txt_small; EXPLAIN VECTORIZATION DETAIL SELECT key / key FROM DECIMAL_UDF_txt_small WHERE key is not null and key <> 0; diff --git ql/src/test/queries/clientpositive/vector_decimal_udf2.q ql/src/test/queries/clientpositive/vector_decimal_udf2.q index 540fb7b..e10b7d1 100644 --- ql/src/test/queries/clientpositive/vector_decimal_udf2.q +++ ql/src/test/queries/clientpositive/vector_decimal_udf2.q @@ -18,6 +18,9 @@ STORED AS ORC; INSERT OVERWRITE TABLE DECIMAL_UDF2 SELECT * FROM DECIMAL_UDF2_txt; +-- Add a single NULL row that will come from ORC as isRepeated. +insert into DECIMAL_UDF2 values (NULL, NULL); + EXPLAIN VECTORIZATION DETAIL SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10; @@ -25,6 +28,10 @@ FROM DECIMAL_UDF2 WHERE key = 10; SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10; +SELECT SUM(HASH(*)) +FROM (SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) +FROM DECIMAL_UDF2) q; + EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), @@ -38,6 +45,13 @@ SELECT log10(key), sqrt(key) FROM DECIMAL_UDF2 WHERE key = 10; +SELECT SUM(HASH(*)) +FROM (SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF2) q; + -- DECIMAL_64 EXPLAIN VECTORIZATION DETAIL @@ -47,6 +61,10 @@ FROM DECIMAL_UDF2_txt WHERE key = 10; SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2_txt WHERE key = 10; +SELECT SUM(HASH(*)) +FROM (SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) +FROM DECIMAL_UDF2_txt) q; + EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), @@ -60,5 +78,12 @@ SELECT log10(key), sqrt(key) FROM DECIMAL_UDF2_txt WHERE key = 10; +SELECT SUM(HASH(*)) +FROM (SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF2_txt) q; + DROP TABLE IF EXISTS DECIMAL_UDF2_txt; DROP TABLE IF EXISTS DECIMAL_UDF2; diff --git ql/src/test/queries/clientpositive/vector_interval_1.q ql/src/test/queries/clientpositive/vector_interval_1.q index f4f0024..3702734 100644 --- ql/src/test/queries/clientpositive/vector_interval_1.q +++ ql/src/test/queries/clientpositive/vector_interval_1.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; drop table if exists vector_interval_1; create table vector_interval_1 (ts timestamp, dt date, str1 string, str2 string) stored as orc; @@ -11,6 +12,8 @@ insert into vector_interval_1 insert into vector_interval_1 select null, null, null, null from src limit 1; +select * from vector_interval_1; + -- constants/cast from string explain vectorization expression select diff --git ql/src/test/queries/clientpositive/vector_ptf_part_simple.q ql/src/test/queries/clientpositive/vector_ptf_part_simple.q index fc9f9eb..5615bca 100644 --- ql/src/test/queries/clientpositive/vector_ptf_part_simple.q +++ ql/src/test/queries/clientpositive/vector_ptf_part_simple.q @@ -531,19 +531,37 @@ count(*) over(partition by p_mfgr order by p_name) as cs from vector_ptf_part_simple_orc; +explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc; + select p_mfgr, p_retailprice, rank() over(partition by p_mfgr) as r from vector_ptf_part_simple_orc; +explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc; + select p_mfgr, p_retailprice, rank() over(partition by p_mfgr order by p_name) as r from vector_ptf_part_simple_orc; +explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc; select p_mfgr, p_name, p_retailprice, rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r from vector_ptf_part_simple_orc; +explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc; select p_mfgr, p_name, p_retailprice, rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r diff --git ql/src/test/queries/clientpositive/vector_udf2.q ql/src/test/queries/clientpositive/vector_udf2.q index e62af6a..bd8e875 100644 --- ql/src/test/queries/clientpositive/vector_udf2.q +++ ql/src/test/queries/clientpositive/vector_udf2.q @@ -33,10 +33,16 @@ create temporary table HIVE_14349 (a string) stored as orc; insert into HIVE_14349 values('XYZa'), ('badXYZa'); +explain vectorization expression +select * from HIVE_14349 where a LIKE 'XYZ%a%'; + select * from HIVE_14349 where a LIKE 'XYZ%a%'; insert into HIVE_14349 values ('XYZab'), ('XYZabBAD'), ('badXYZab'), ('badXYZabc'); +explain vectorization expression +select * from HIVE_14349 where a LIKE 'XYZ%a_'; + select * from HIVE_14349 where a LIKE 'XYZ%a_'; drop table HIVE_14349; diff --git ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q index eeb5ab8..5c052a1 100644 --- ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q +++ ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q @@ -1,5 +1,5 @@ set hive.mapred.mode=nonstrict; -SET hive.vectorized.execution.enabled = true; +SET hive.vectorized.execution.enabled = false; SET hive.int.timestamp.conversion.in.seconds=false; set hive.fetch.task.conversion=none; diff --git ql/src/test/queries/clientpositive/vectorization_div0.q ql/src/test/queries/clientpositive/vectorization_div0.q index d7b6c3c..26ff266 100644 --- ql/src/test/queries/clientpositive/vectorization_div0.q +++ ql/src/test/queries/clientpositive/vectorization_div0.q @@ -3,6 +3,8 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; set hive.fetch.task.conversion=none; +-- SORT_QUERY_RESULTS + -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants explain vectorization expression select cint / 0, ctinyint / 0, cbigint / 0, cdouble / 0.0 from alltypesorc limit 100; diff --git ql/src/test/queries/clientpositive/vectorization_nested_udf.q ql/src/test/queries/clientpositive/vectorization_nested_udf.q index da8f99c..25a25df 100644 --- ql/src/test/queries/clientpositive/vectorization_nested_udf.q +++ ql/src/test/queries/clientpositive/vectorization_nested_udf.q @@ -1,5 +1,8 @@ +set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; +EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc; SELECT SUM(abs(ctinyint)) from alltypesorc; diff --git ql/src/test/queries/clientpositive/vectorized_case.q ql/src/test/queries/clientpositive/vectorized_case.q index 99d7cfc..8aad2b5 100644 --- ql/src/test/queries/clientpositive/vectorized_case.q +++ ql/src/test/queries/clientpositive/vectorized_case.q @@ -1,8 +1,8 @@ set hive.explain.user=false; set hive.fetch.task.conversion=none; -set hive.vectorized.execution.enabled = true -; -explain vectorization expression +set hive.vectorized.execution.enabled = true; + +explain vectorization detail select csmallint, case @@ -37,7 +37,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 ; -explain vectorization expression +explain vectorization detail select csmallint, case @@ -55,7 +55,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 ; -explain vectorization expression +explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd @@ -64,7 +64,7 @@ select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc; -explain vectorization expression +explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd @@ -79,19 +79,19 @@ CREATE TABLE test_1 (member DECIMAL , attr DECIMAL) STORED AS ORC; INSERT INTO test_1 VALUES (3.0,1.0),(2.0,2.0),(1.0,3.0); --for length=3 -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1; SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1; --for length=2 and the expr2 is null -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1; SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1; --for length=2 and the expr3 is null -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1; SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1; @@ -102,19 +102,105 @@ CREATE TABLE test_2 (member BIGINT, attr BIGINT) STORED AS ORC; INSERT INTO test_2 VALUES (3,1),(2,2),(1,3); --for length=3 -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2; SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2; ---for length=2 and the expression2 is null -EXPLAIN VECTORIZATION EXPRESSION +--for length=2 and the detail2 is null +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2; SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2; ---for length=2 and the expression3 is null -EXPLAIN VECTORIZATION EXPRESSION +--for length=2 and the detail3 is null +EXPLAIN VECTORIZATION DETAIL +SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; + SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; -SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; \ No newline at end of file + +select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a; + + +select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a; + + diff --git ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out index 9ce5e1e..d77684b 100644 --- ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out +++ ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out @@ -24,6 +24,18 @@ POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type: POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into varchar_udf_1 values (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@varchar_udf_1 +POSTHOOK: query: insert into varchar_udf_1 values (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@varchar_udf_1 +POSTHOOK: Lineage: varchar_udf_1.c1 EXPRESSION [] +POSTHOOK: Lineage: varchar_udf_1.c2 EXPRESSION [] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [] PREHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF_txt PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF_txt @@ -74,6 +86,16 @@ POSTHOOK: Input: default@decimal_udf_txt POSTHOOK: Output: default@decimal_udf POSTHOOK: Lineage: decimal_udf.key SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] POSTHOOK: Lineage: decimal_udf.value SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:value, type:int, comment:null), ] +PREHOOK: query: insert into DECIMAL_UDF values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_udf +POSTHOOK: query: insert into DECIMAL_UDF values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_udf +POSTHOOK: Lineage: decimal_udf.key EXPRESSION [] +POSTHOOK: Lineage: decimal_udf.value EXPRESSION [] PREHOOK: query: drop table if exists count_case_groupby PREHOOK: type: DROPTABLE POSTHOOK: query: drop table if exists count_case_groupby @@ -96,23 +118,33 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@count_case_groupby POSTHOOK: Lineage: count_case_groupby.bool SCRIPT [] POSTHOOK: Lineage: count_case_groupby.key SCRIPT [] +PREHOOK: query: insert into table count_case_groupby values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@count_case_groupby +POSTHOOK: query: insert into table count_case_groupby values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@count_case_groupby +POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [] +POSTHOOK: Lineage: count_case_groupby.key EXPRESSION [] PREHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1 +from varchar_udf_1 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1 +from varchar_udf_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -127,33 +159,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFRegExp(Column[c2], Const string val) because hive.vectorized.adaptor.usage.mode=none - vectorized: false Stage: Stage-0 Fetch Operator - limit: 1 + limit: -1 Processor Tree: ListSink @@ -161,7 +184,7 @@ PREHOOK: query: select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1 +from varchar_udf_1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### @@ -169,28 +192,29 @@ POSTHOOK: query: select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1 +from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### +NULL NULL NULL true true true PREHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1 +from varchar_udf_1 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1 +from varchar_udf_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -205,33 +229,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 2 Data size: 744 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 744 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> regexp_extract (Column[c2], Const string val_([0-9]+), Const int 1) because hive.vectorized.adaptor.usage.mode=none - vectorized: false Stage: Stage-0 Fetch Operator - limit: 1 + limit: -1 Processor Tree: ListSink @@ -239,7 +254,7 @@ PREHOOK: query: select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1 +from varchar_udf_1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### @@ -247,28 +262,29 @@ POSTHOOK: query: select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1 +from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true +NULL NULL NULL PREHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1 +from varchar_udf_1 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1 +from varchar_udf_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -283,33 +299,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 2 Data size: 744 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 744 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> regexp_replace (Column[c2], Const string val, Const string replaced) because hive.vectorized.adaptor.usage.mode=none - vectorized: false Stage: Stage-0 Fetch Operator - limit: 1 + limit: -1 Processor Tree: ListSink @@ -317,7 +324,7 @@ PREHOOK: query: select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1 +from varchar_udf_1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### @@ -325,28 +332,29 @@ POSTHOOK: query: select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1 +from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### +NULL NULL NULL replaced_238 replaced_238 true PREHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1 +from varchar_udf_1 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1 +from varchar_udf_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -361,33 +369,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFRegExp(Column[c2], Const string val) because hive.vectorized.adaptor.usage.mode=chosen and the UDF wasn't one of the chosen ones - vectorized: false Stage: Stage-0 Fetch Operator - limit: 1 + limit: -1 Processor Tree: ListSink @@ -395,7 +394,7 @@ PREHOOK: query: select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1 +from varchar_udf_1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### @@ -403,28 +402,29 @@ POSTHOOK: query: select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1 +from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### +NULL NULL NULL true true true PREHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1 +from varchar_udf_1 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1 +from varchar_udf_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -439,49 +439,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true + Statistics: Num rows: 2 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [5, 6, 9] - selectExpressions: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 5:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 6:string, StringGroupColEqualStringGroupColumn(col 7:string, col 8:string)(children: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 7:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 8:string) -> 9:boolean - Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap + Statistics: Num rows: 2 Data size: 744 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 744 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true Stage: Stage-0 Fetch Operator - limit: 1 + limit: -1 Processor Tree: ListSink @@ -489,7 +464,7 @@ PREHOOK: query: select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1 +from varchar_udf_1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### @@ -497,28 +472,29 @@ POSTHOOK: query: select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1 +from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true +NULL NULL NULL PREHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1 +from varchar_udf_1 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1 +from varchar_udf_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -533,49 +509,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true + Statistics: Num rows: 2 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [5, 6, 9] - selectExpressions: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 5:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 6:string, StringGroupColEqualStringGroupColumn(col 7:string, col 8:string)(children: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 7:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 8:string) -> 9:boolean - Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap + Statistics: Num rows: 2 Data size: 744 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 744 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true Stage: Stage-0 Fetch Operator - limit: 1 + limit: -1 Processor Tree: ListSink @@ -583,7 +534,7 @@ PREHOOK: query: select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1 +from varchar_udf_1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### @@ -591,18 +542,19 @@ POSTHOOK: query: select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1 +from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### +NULL NULL NULL replaced_238 replaced_238 true PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -617,26 +569,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFPower(Column[key], Const int 2) because hive.vectorized.adaptor.usage.mode=none - vectorized: false Stage: Stage-0 Fetch Operator @@ -690,6 +636,7 @@ POSTHOOK: Input: default@decimal_udf 9.8596 9.8596 NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), @@ -705,8 +652,8 @@ SELECT FROM DECIMAL_UDF WHERE key = 10 POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -721,7 +668,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = 10) (type: boolean) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE @@ -738,12 +685,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> log (Column[value], Const decimal(20,10) 10) because hive.vectorized.adaptor.usage.mode=none - vectorized: false Stage: Stage-0 Fetch Operator @@ -773,8 +714,8 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -789,26 +730,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFPower(Column[key], Const int 2) because hive.vectorized.adaptor.usage.mode=chosen and the UDF wasn't one of the chosen ones - vectorized: false Stage: Stage-0 Fetch Operator @@ -862,6 +797,7 @@ POSTHOOK: Input: default@decimal_udf 9.8596 9.8596 NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT exp(key), ln(key), @@ -877,8 +813,8 @@ SELECT FROM DECIMAL_UDF WHERE key = 10 POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -893,7 +829,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = 10) (type: boolean) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE @@ -910,12 +846,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: SELECT operator: Could not vectorize expression (mode = PROJECTION): GenericUDFBridge ==> log (Column[value], Const decimal(20,10) 10) because hive.vectorized.adaptor.usage.mode=chosen and the UDF wasn't one of the chosen ones - vectorized: false Stage: Stage-0 Fetch Operator @@ -947,8 +877,8 @@ POSTHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -966,82 +896,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_case_groupby - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true + Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 6] - selectExpressions: IfExprLongScalarLongColumn(col 1:boolean, val 1, col 5:int)(children: IfExprColumnNull(col 3:boolean, col 4:int, null)(children: NotCol(col 1:boolean) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:int) -> 5:int) -> 6:int - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) - Group By Vectorization: - aggregators: VectorUDAFCount(col 6:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1061,6 +946,7 @@ POSTHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 EL POSTHOOK: type: QUERY POSTHOOK: Input: default@count_case_groupby #### A masked pattern was here #### +NULL 0 key1 1 key2 1 key3 0 @@ -1073,8 +959,8 @@ POSTHOOK: query: explain vectorization expression SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1092,82 +978,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_case_groupby - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true + Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 6] - selectExpressions: IfExprLongScalarLongColumn(col 1:boolean, val 1, col 5:int)(children: IfExprColumnNull(col 3:boolean, col 4:int, null)(children: NotCol(col 1:boolean) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:int) -> 5:int) -> 6:int - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) - Group By Vectorization: - aggregators: VectorUDAFCount(col 6:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1187,6 +1028,7 @@ POSTHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 EL POSTHOOK: type: QUERY POSTHOOK: Input: default@count_case_groupby #### A masked pattern was here #### +NULL 0 key1 1 key2 1 key3 0 diff --git ql/src/test/results/clientpositive/llap/vector_between_columns.q.out ql/src/test/results/clientpositive/llap/vector_between_columns.q.out index 48d5275..c85c59e 100644 --- ql/src/test/results/clientpositive/llap/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_columns.q.out @@ -47,6 +47,17 @@ POSTHOOK: Output: default@TSINT POSTHOOK: Lineage: tsint.csint SIMPLE [(tsint_txt)tsint_txt.FieldSchema(name:csint, type:smallint, comment:null), ] POSTHOOK: Lineage: tsint.rnum SIMPLE [(tsint_txt)tsint_txt.FieldSchema(name:rnum, type:int, comment:null), ] tsint_txt.rnum tsint_txt.csint +PREHOOK: query: insert into TSINT values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tsint +POSTHOOK: query: insert into TSINT values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tsint +POSTHOOK: Lineage: tsint.csint EXPRESSION [] +POSTHOOK: Lineage: tsint.rnum EXPRESSION [] +_col0 _col1 PREHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@tint_txt @@ -60,6 +71,17 @@ POSTHOOK: Output: default@TINT POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ] tint_txt.rnum tint_txt.cint +PREHOOK: query: insert into TINT values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tint +POSTHOOK: query: insert into TINT values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tint +POSTHOOK: Lineage: tint.cint EXPRESSION [] +POSTHOOK: Lineage: tint.rnum EXPRESSION [] +_col0 _col1 Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint @@ -88,7 +110,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tint - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -98,14 +120,14 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -122,7 +144,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tsint - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -132,14 +154,14 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: smallint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -162,14 +184,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3)) THEN ('Ok') ELSE ('NoOk') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -198,26 +220,37 @@ tint.rnum tsint.rnum tint.cint tsint.csint between_col 0 2 NULL 0 NoOk 0 3 NULL 1 NoOk 0 4 NULL 10 NoOk +0 NULL NULL NULL NoOk 1 0 -1 NULL NoOk 1 1 -1 -1 Ok 1 2 -1 0 NoOk 1 3 -1 1 NoOk 1 4 -1 10 NoOk +1 NULL -1 NULL NoOk 2 0 0 NULL NoOk 2 1 0 -1 NoOk 2 2 0 0 Ok 2 3 0 1 NoOk 2 4 0 10 NoOk +2 NULL 0 NULL NoOk 3 0 1 NULL NoOk 3 1 1 -1 NoOk 3 2 1 0 NoOk 3 3 1 1 Ok 3 4 1 10 NoOk +3 NULL 1 NULL NoOk 4 0 10 NULL NoOk 4 1 10 -1 NoOk 4 2 10 0 NoOk 4 3 10 1 NoOk 4 4 10 10 Ok +4 NULL 10 NULL NoOk +NULL 0 NULL NULL NoOk +NULL 1 NULL -1 NoOk +NULL 2 NULL 0 NoOk +NULL 3 NULL 1 NoOk +NULL 4 NULL 10 NoOk +NULL NULL NULL NULL NoOk Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint @@ -246,7 +279,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tint - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -256,14 +289,14 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -280,7 +313,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tsint - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -290,14 +323,14 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: smallint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -320,17 +353,17 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3) (type: boolean) - Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 68 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 68 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 68 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index e02f64c..f4f87ae 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -12,6 +12,18 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +PREHOOK: query: insert into decimal_date_test values (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_date_test +POSTHOOK: query: insert into decimal_date_test values (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_date_test +POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [] +POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [] +POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [] +POSTHOOK: Lineage: decimal_date_test.cdouble EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate @@ -36,7 +48,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -131,7 +143,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -140,13 +152,13 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsFalse(col 5:boolean)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 5:boolean) predicate: (not (cdate) IN (1969-10-26, 1969-07-14, 1970-01-21)) (type: boolean) - Statistics: Num rows: 12273 Data size: 653001 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12274 Data size: 653057 Basic stats: COMPLETE Column stats: NONE Select Operator Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [] - Statistics: Num rows: 12273 Data size: 653001 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12274 Data size: 653057 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -240,7 +252,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 1307600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -335,7 +347,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 1307600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -344,13 +356,13 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsFalse(col 5:boolean)(children: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean) predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) - Statistics: Num rows: 12273 Data size: 1306003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12274 Data size: 1306115 Basic stats: COMPLETE Column stats: NONE Select Operator Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [] - Statistics: Num rows: 12273 Data size: 1306003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12274 Data size: 1306115 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -444,7 +456,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -453,7 +465,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColumnBetween(col 3:date, left -2, right 1) predicate: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) - Statistics: Num rows: 1365 Data size: 72626 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 72627 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 @@ -461,7 +473,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 1365 Data size: 72626 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 72627 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + @@ -469,7 +481,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1365 Data size: 72626 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 72627 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -497,13 +509,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1365 Data size: 72626 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 72627 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1365 Data size: 72626 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 72627 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -539,7 +551,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -548,7 +560,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColumnNotBetween(col 3:date, left -610, right 608) predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean) - Statistics: Num rows: 10923 Data size: 581173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 581228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 @@ -556,7 +568,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 10923 Data size: 581173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 581228 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + @@ -564,7 +576,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 10923 Data size: 581173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 581228 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -592,13 +604,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 10923 Data size: 581173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 581228 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10923 Data size: 581173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 581228 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -634,7 +646,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 1307600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -643,7 +655,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColumnBetween(col 1:decimal(20,10), left -20, right 45.9918918919) predicate: cdecimal1 BETWEEN -20 AND 45.9918918919 (type: boolean) - Statistics: Num rows: 1365 Data size: 145253 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 145254 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 @@ -651,7 +663,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 1365 Data size: 145253 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 145254 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + @@ -659,7 +671,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1365 Data size: 145253 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 145254 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -687,13 +699,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1365 Data size: 145253 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 145254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1365 Data size: 145253 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 145254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -729,7 +741,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 1307600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -738,13 +750,13 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColumnNotBetween(col 1:decimal(20,10), left -2000, right 4390.1351351351) predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) - Statistics: Num rows: 10923 Data size: 1162346 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 1162457 Basic stats: COMPLETE Column stats: NONE Select Operator Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [] - Statistics: Num rows: 10923 Data size: 1162346 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 1162457 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -1084,7 +1096,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1095,7 +1107,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5] selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 5:boolean - Statistics: Num rows: 12288 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -1109,7 +1121,7 @@ STAGE PLANS: keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1118,7 +1130,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1153,7 +1165,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 326900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 326901 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1161,7 +1173,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 326900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 326901 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1179,13 +1191,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6144 Data size: 326900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 326901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 326900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 326901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1222,7 +1234,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 1307600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1233,7 +1245,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5] selectExpressions: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean - Statistics: Num rows: 12288 Data size: 1307600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -1247,7 +1259,7 @@ STAGE PLANS: keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 1307600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1256,7 +1268,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 1307600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1291,7 +1303,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 653802 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1299,7 +1311,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 653802 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1317,13 +1329,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6144 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 653802 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 653802 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1360,7 +1372,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1371,7 +1383,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5] selectExpressions: VectorUDFAdaptor(cdate BETWEEN 1969-12-30 AND 1970-01-02) -> 5:boolean - Statistics: Num rows: 12288 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -1385,7 +1397,7 @@ STAGE PLANS: keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1394,7 +1406,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1429,7 +1441,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 326900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 326901 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1437,7 +1449,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 326900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 326901 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1455,13 +1467,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6144 Data size: 326900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 326901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 326900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 326901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1498,7 +1510,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 1307600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1509,7 +1521,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5] selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 5:boolean - Statistics: Num rows: 12288 Data size: 1307600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -1523,7 +1535,7 @@ STAGE PLANS: keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 1307600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1532,7 +1544,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 1307600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1567,7 +1579,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 653802 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1575,7 +1587,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 653802 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1593,13 +1605,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6144 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 653802 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 653800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 653802 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1619,7 +1631,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 6230 +NULL 6231 false 6041 true 17 PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1630,7 +1642,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 3114 +NULL 3115 false 9165 true 9 PREHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1641,7 +1653,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-3 POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 6230 +NULL 6231 false 5974 true 84 PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1652,7 +1664,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AN POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 3114 +NULL 3115 false 3002 true 6172 PREHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1663,7 +1675,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 6230 +NULL 6231 false 6041 true 17 PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1674,7 +1686,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 3114 +NULL 3115 false 9165 true 9 PREHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1685,7 +1697,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-3 POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 6230 +NULL 6231 false 5974 true 84 PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1696,6 +1708,6 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AN POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 3114 +NULL 3115 false 3002 true 6172 diff --git ql/src/test/results/clientpositive/llap/vector_bround.q.out ql/src/test/results/clientpositive/llap/vector_bround.q.out index 7b14a89..cfe90cb 100644 --- ql/src/test/results/clientpositive/llap/vector_bround.q.out +++ ql/src/test/results/clientpositive/llap/vector_bround.q.out @@ -34,6 +34,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_vector_bround POSTHOOK: Lineage: test_vector_bround.v0 SCRIPT [] POSTHOOK: Lineage: test_vector_bround.v1 SCRIPT [] +PREHOOK: query: insert into test_vector_bround values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_vector_bround +POSTHOOK: query: insert into test_vector_bround values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_vector_bround +POSTHOOK: Lineage: test_vector_bround.v0 EXPRESSION [] +POSTHOOK: Lineage: test_vector_bround.v1 EXPRESSION [] PREHOOK: query: explain vectorization detail select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY @@ -57,7 +67,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_vector_bround - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:v0:double, 1:v1:double, 2:ROW__ID:struct] @@ -69,13 +79,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4] selectExpressions: FuncBRoundDoubleToDouble(col 0:double) -> 3:double, BRoundWithNumDigitsDoubleToDouble(col 1, decimalPlaces 1) -> 4:double - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -120,3 +130,4 @@ POSTHOOK: Input: default@test_vector_bround 3.0 1.3 3.0 1.3 4.0 1.4 +NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index 827ec2e..9a43659 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -26,6 +26,16 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@char_2 POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into char_2 values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert into char_2 values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [] +POSTHOOK: Lineage: char_2.value EXPRESSION [] PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows from src group by value @@ -80,7 +90,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: char_2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -91,7 +101,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3] selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: @@ -217,11 +227,11 @@ limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_2 #### A masked pattern was here #### +NULL NULL 1 val_0 0 3 val_10 10 1 val_100 200 2 val_103 206 2 -val_104 208 2 PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows from src group by value @@ -276,7 +286,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: char_2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -287,7 +297,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3] selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index e8151b7..a53ed39 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -16,6 +16,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@str_str_orc POSTHOOK: Lineage: str_str_orc.str1 SCRIPT [] POSTHOOK: Lineage: str_str_orc.str2 SCRIPT [] +PREHOOK: query: insert into str_str_orc values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@str_str_orc +POSTHOOK: query: insert into str_str_orc values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@str_str_orc +POSTHOOK: Lineage: str_str_orc.str1 EXPRESSION [] +POSTHOOK: Lineage: str_str_orc.str2 EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result @@ -29,8 +39,8 @@ from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -48,13 +58,28 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 5] + selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int + Statistics: Num rows: 5 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 1:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -63,15 +88,42 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -79,9 +131,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 3, decimalPlaces 2)(children: DoubleColDivideDoubleScalar(col 2:double, val 60.0)(children: CastLongToDouble(col 1:bigint) -> 2:double) -> 3:double) -> 2:double Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -109,6 +169,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### y 0.0 +NULL 0.0 X 0.02 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result @@ -119,8 +180,8 @@ SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -135,20 +196,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: COALESCE(str1,0) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4] + selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string + Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -170,6 +250,7 @@ POSTHOOK: Input: default@str_str_orc 0 1 0 +0 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result @@ -202,7 +283,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -213,7 +294,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 5] selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int - Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) Group By Vectorization: @@ -313,6 +394,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### y 0.0 +NULL 0.0 X 0.02 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result @@ -339,7 +421,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -350,13 +432,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -393,3 +475,4 @@ POSTHOOK: Input: default@str_str_orc 0 1 0 +0 diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out index f149d7d..116f4d6 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out @@ -33,14 +33,37 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN +PREHOOK: query: insert into test_1 values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_1 +POSTHOOK: query: insert into test_1 values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_1 +POSTHOOK: Lineage: test_1.attr EXPRESSION [] +POSTHOOK: Lineage: test_1.member EXPRESSION [] +PREHOOK: query: insert into test_2 values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_2 +POSTHOOK: query: insert into test_2 values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_2 +POSTHOOK: Lineage: test_2.member EXPRESSION [] +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -57,51 +80,118 @@ STAGE PLANS: Map Operator Tree: TableScan alias: m - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:member:bigint, 1:ROW__ID:struct] Select Operator expressions: member (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 keys: 0 _col0 (type: bigint) 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumnNums: [0] + bigTableRetainedColumnNums: [0] + bigTableValueColumnNums: [0] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0, 2] + smallTableMapping: [2] outputColumnNames: _col0, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4] + selectExpressions: IfExprColumnNull(col 3:boolean, col 2:bigint, null)(children: LongColGreaterLongScalar(col 4:bigint, val 1)(children: VectorCoalesce(columns [2, 3])(children: col 2:bigint, ConstantVectorExpression(val 5) -> 3:bigint) -> 4:bigint) -> 3:boolean, col 2:bigint) -> 4:bigint + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: member:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Map 2 Map Operator Tree: TableScan alias: n - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: member (type: bigint), attr (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -125,3 +215,4 @@ POSTHOOK: Input: default@test_2 2 2 3 NULL 4 NULL +NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_const.q.out ql/src/test/results/clientpositive/llap/vector_const.q.out new file mode 100644 index 0000000..964ddcc --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_const.q.out @@ -0,0 +1,66 @@ +PREHOOK: query: CREATE TEMPORARY TABLE varchar_const_1 (c1 int) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_const_1 +POSTHOOK: query: CREATE TEMPORARY TABLE varchar_const_1 (c1 int) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_const_1 +PREHOOK: query: INSERT INTO varchar_const_1 values(42) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@varchar_const_1 +POSTHOOK: query: INSERT INTO varchar_const_1 values(42) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@varchar_const_1 +POSTHOOK: Lineage: varchar_const_1.c1 SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: varchar_const_1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'FF' (type: varchar(4)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_const_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_const_1 +#### A masked pattern was here #### +FF diff --git ql/src/test/results/clientpositive/llap/vector_data_types.q.out ql/src/test/results/clientpositive/llap/vector_data_types.q.out index 06b50bb..961261d 100644 --- ql/src/test/results/clientpositive/llap/vector_data_types.q.out +++ ql/src/test/results/clientpositive/llap/vector_data_types.q.out @@ -95,6 +95,25 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: insert into over1korc values (NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@over1korc +POSTHOOK: query: insert into over1korc values (NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@over1korc +POSTHOOK: Lineage: over1korc.b EXPRESSION [] +POSTHOOK: Lineage: over1korc.bin EXPRESSION [] +POSTHOOK: Lineage: over1korc.bo EXPRESSION [] +POSTHOOK: Lineage: over1korc.d EXPRESSION [] +POSTHOOK: Lineage: over1korc.dec EXPRESSION [] +POSTHOOK: Lineage: over1korc.f EXPRESSION [] +POSTHOOK: Lineage: over1korc.i EXPRESSION [] +POSTHOOK: Lineage: over1korc.s EXPRESSION [] +POSTHOOK: Lineage: over1korc.si EXPRESSION [] +POSTHOOK: Lineage: over1korc.t EXPRESSION [] +POSTHOOK: Lineage: over1korc.ts EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i LIMIT 20 @@ -119,15 +138,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1050 Data size: 358026 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1050 Data size: 358026 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1050 Data size: 358026 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: llap @@ -138,7 +157,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1050 Data size: 358026 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE @@ -164,6 +183,7 @@ POSTHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 374 65560 4294967516 65.43 22.48 true oscar quirinius 2013-03-01 09:11:58.703316 16.86 mathematics NULL 409 65536 4294967490 46.97 25.92 false fred miller 2013-03-01 09:11:58.703116 33.45 history NULL 473 65720 4294967324 80.74 40.6 false holly falkner 2013-03-01 09:11:58.703111 18.80 mathematics @@ -183,7 +203,6 @@ NULL 473 65720 4294967324 80.74 40.6 false holly falkner 2013-03-01 09:11:58.703 -2 461 65648 4294967425 58.52 24.85 false rachel thompson 2013-03-01 09:11:58.703318 85.62 zync studies -1 268 65778 4294967418 56.33 44.73 true calvin falkner 2013-03-01 09:11:58.70322 7.37 history -1 281 65643 4294967323 15.1 45.0 false irene nixon 2013-03-01 09:11:58.703223 80.96 undecided --1 300 65663 4294967343 71.26 34.62 true calvin ovid 2013-03-01 09:11:58.703262 78.56 study skills PREHOOK: query: SELECT SUM(HASH(*)) FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q PREHOOK: type: QUERY @@ -219,7 +238,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1050 Data size: 358026 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -229,7 +248,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1050 Data size: 358026 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ @@ -237,7 +256,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1050 Data size: 358026 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized, llap @@ -267,7 +286,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1050 Data size: 358026 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -299,6 +318,7 @@ POSTHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 374 65560 4294967516 65.43 22.48 true oscar quirinius 2013-03-01 09:11:58.703316 16.86 mathematics NULL 409 65536 4294967490 46.97 25.92 false fred miller 2013-03-01 09:11:58.703116 33.45 history NULL 473 65720 4294967324 80.74 40.6 false holly falkner 2013-03-01 09:11:58.703111 18.80 mathematics @@ -318,7 +338,115 @@ NULL 473 65720 4294967324 80.74 40.6 false holly falkner 2013-03-01 09:11:58.703 -2 461 65648 4294967425 58.52 24.85 false rachel thompson 2013-03-01 09:11:58.703318 85.62 zync studies -1 268 65778 4294967418 56.33 44.73 true calvin falkner 2013-03-01 09:11:58.70322 7.37 history -1 281 65643 4294967323 15.1 45.0 false irene nixon 2013-03-01 09:11:58.703223 80.96 undecided --1 300 65663 4294967343 71.26 34.62 true calvin ovid 2013-03-01 09:11:58.703262 78.56 study skills +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT SUM(HASH(*)) +FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT SUM(HASH(*)) +FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1050 Data size: 358026 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Select Operator + expressions: hash(t,si,i,b,f,d,bo,s,ts,dec,bin) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [12] + selectExpressions: VectorUDFAdaptor(hash(t,si,i,b,f,d,bo,s,ts,dec,bin)) -> 12:int + Statistics: Num rows: 1050 Data size: 358026 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 12:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(HASH(*)) FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_date_1.q.out ql/src/test/results/clientpositive/llap/vector_date_1.q.out index 1e3d2b3..bacd667 100644 --- ql/src/test/results/clientpositive/llap/vector_date_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_date_1.q.out @@ -22,6 +22,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 EXPRESSION [] POSTHOOK: Lineage: vector_date_1.dt2 EXPRESSION [] +_col0 _col1 PREHOOK: query: insert into table vector_date_1 select date '1999-12-31', date '2000-01-01' from src limit 1 PREHOOK: type: QUERY @@ -34,6 +35,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] +_c0 _c1 PREHOOK: query: insert into table vector_date_1 select date '2001-01-01', date '2001-06-01' from src limit 1 PREHOOK: type: QUERY @@ -46,7 +48,20 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] -PREHOOK: query: explain +_c0 _c1 +PREHOOK: query: select * from vector_date_1 order by dt1, dt2 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from vector_date_1 order by dt1, dt2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +vector_date_1.dt1 vector_date_1.dt2 +NULL NULL +1999-12-31 2000-01-01 +2001-01-01 2001-06-01 +PREHOOK: query: explain vectorization detail select dt1, dt2, -- should be all true @@ -60,7 +75,7 @@ select dt2 > dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2, -- should be all true @@ -74,6 +89,11 @@ select dt2 > dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -91,26 +111,75 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), dt2 (type: date), (dt1 = dt1) (type: boolean), (dt1 <> dt2) (type: boolean), (dt1 <= dt1) (type: boolean), (dt1 <= dt2) (type: boolean), (dt1 < dt2) (type: boolean), (dt2 >= dt2) (type: boolean), (dt2 >= dt1) (type: boolean), (dt2 > dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: LongColEqualLongColumn(col 0:date, col 0:date) -> 3:boolean, LongColNotEqualLongColumn(col 0:date, col 1:date) -> 4:boolean, LongColLessEqualLongColumn(col 0:date, col 0:date) -> 5:boolean, LongColLessEqualLongColumn(col 0:date, col 1:date) -> 6:boolean, LongColLessLongColumn(col 0:date, col 1:date) -> 7:boolean, LongColGreaterEqualLongColumn(col 1:date, col 1:date) -> 8:boolean, LongColGreaterEqualLongColumn(col 1:date, col 0:date) -> 9:boolean, LongColGreaterLongColumn(col 1:date, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 10 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:date, VALUE._col1:boolean, VALUE._col2:boolean, VALUE._col3:boolean, VALUE._col4:boolean, VALUE._col5:boolean, VALUE._col6:boolean, VALUE._col7:boolean, VALUE._col8:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -153,10 +222,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 2000-01-01 true true true true true true true true 2001-01-01 2001-06-01 true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2, -- should be all false @@ -170,7 +240,7 @@ select dt2 < dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2, -- should be all false @@ -184,6 +254,11 @@ select dt2 < dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -201,26 +276,75 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), dt2 (type: date), (dt1 <> dt1) (type: boolean), (dt1 = dt2) (type: boolean), (dt1 < dt1) (type: boolean), (dt1 >= dt2) (type: boolean), (dt1 > dt2) (type: boolean), (dt2 > dt2) (type: boolean), (dt2 <= dt1) (type: boolean), (dt2 < dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: LongColNotEqualLongColumn(col 0:date, col 0:date) -> 3:boolean, LongColEqualLongColumn(col 0:date, col 1:date) -> 4:boolean, LongColLessLongColumn(col 0:date, col 0:date) -> 5:boolean, LongColGreaterEqualLongColumn(col 0:date, col 1:date) -> 6:boolean, LongColGreaterLongColumn(col 0:date, col 1:date) -> 7:boolean, LongColGreaterLongColumn(col 1:date, col 1:date) -> 8:boolean, LongColLessEqualLongColumn(col 1:date, col 0:date) -> 9:boolean, LongColLessLongColumn(col 1:date, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 10 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:date, VALUE._col1:boolean, VALUE._col2:boolean, VALUE._col3:boolean, VALUE._col4:boolean, VALUE._col5:boolean, VALUE._col6:boolean, VALUE._col7:boolean, VALUE._col8:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -263,10 +387,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 2000-01-01 false false false false false false false false 2001-01-01 2001-06-01 false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, -- should be all true @@ -280,7 +405,7 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, -- should be all true @@ -294,6 +419,11 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -311,26 +441,75 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), (dt1 <> 1970-01-01) (type: boolean), (dt1 >= 1970-01-01) (type: boolean), (dt1 > 1970-01-01) (type: boolean), (dt1 <= 2100-01-01) (type: boolean), (dt1 < 2100-01-01) (type: boolean), (1970-01-01 <> dt1) (type: boolean), (1970-01-01 <= dt1) (type: boolean), (1970-01-01 < dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: DateColNotEqualDateScalar(col 0:date, date 1970-01-01) -> 3:boolean, DateColGreaterEqualDateScalar(col 0:date, date 1970-01-01) -> 4:boolean, DateColGreaterDateScalar(col 0:date, date 1970-01-01) -> 5:boolean, DateColLessEqualDateScalar(col 0:date, date 2100-01-01) -> 6:boolean, DateColLessDateScalar(col 0:date, date 2100-01-01) -> 7:boolean, DateScalarNotEqualDateColumn(date 1970-01-01, col 0:date) -> 8:boolean, DateScalarLessEqualDateColumn(date 1970-01-01, col 0:date) -> 9:boolean, DateScalarLessDateColumn(date 1970-01-01, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:boolean, VALUE._col1:boolean, VALUE._col2:boolean, VALUE._col3:boolean, VALUE._col4:boolean, VALUE._col5:boolean, VALUE._col6:boolean, VALUE._col7:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -373,10 +552,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 true true true true true true true true 2001-01-01 true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, -- should all be false @@ -390,7 +570,7 @@ select date '1970-01-01' > dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, -- should all be false @@ -404,6 +584,11 @@ select date '1970-01-01' > dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -421,26 +606,75 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), (dt1 = 1970-01-01) (type: boolean), (dt1 <= 1970-01-01) (type: boolean), (dt1 < 1970-01-01) (type: boolean), (dt1 >= 2100-01-01) (type: boolean), (dt1 > 2100-01-01) (type: boolean), (1970-01-01 = dt1) (type: boolean), (1970-01-01 >= dt1) (type: boolean), (1970-01-01 > dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: DateColEqualDateScalar(col 0:date, date 1970-01-01) -> 3:boolean, DateColLessEqualDateScalar(col 0:date, date 1970-01-01) -> 4:boolean, DateColLessDateScalar(col 0:date, date 1970-01-01) -> 5:boolean, DateColGreaterEqualDateScalar(col 0:date, date 2100-01-01) -> 6:boolean, DateColGreaterDateScalar(col 0:date, date 2100-01-01) -> 7:boolean, DateScalarEqualDateColumn(date 1970-01-01, col 0:date) -> 8:boolean, DateScalarGreaterEqualDateColumn(date 1970-01-01, col 0:date) -> 9:boolean, DateScalarGreaterDateColumn(date 1970-01-01, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:boolean, VALUE._col1:boolean, VALUE._col2:boolean, VALUE._col3:boolean, VALUE._col4:boolean, VALUE._col5:boolean, VALUE._col6:boolean, VALUE._col7:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -483,10 +717,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 false false false false false false false false 2001-01-01 false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -499,7 +734,7 @@ where and dt2 >= dt1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -512,6 +747,11 @@ where and dt2 >= dt1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -529,29 +769,81 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 0:date, col 0:date), FilterLongColNotEqualLongColumn(col 0:date, col 1:date), FilterLongColLessLongColumn(col 0:date, col 1:date), FilterLongColLessEqualLongColumn(col 0:date, col 1:date), FilterLongColGreaterLongColumn(col 1:date, col 0:date), FilterLongColGreaterEqualLongColumn(col 1:date, col 0:date)) predicate: ((dt1 < dt2) and (dt1 <= dt2) and (dt1 <> dt2) and (dt1 = dt1) and (dt2 > dt1) and (dt2 >= dt1)) (type: boolean) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dt1 (type: date), dt2 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -592,9 +884,10 @@ order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 1999-12-31 2000-01-01 2001-01-01 2001-06-01 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -609,7 +902,7 @@ where and date '1970-01-01' <= dt1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -624,6 +917,11 @@ where and date '1970-01-01' <= dt1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -638,15 +936,30 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDateScalarEqualDateColumn(val 11323, col 0:date), FilterDateColNotEqualDateScalar(col 0:date, val 0), FilterDateScalarNotEqualDateColumn(val 0, col 0:date)) predicate: ((1970-01-01 <> dt1) and (2001-01-01 = dt1) and (dt1 <> 1970-01-01)) (type: boolean) Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 2001-01-01 (type: date), dt2 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 1] + selectExpressions: ConstantVectorExpression(val 11323) -> 3:date Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -654,6 +967,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Stage: Stage-0 Fetch Operator @@ -693,13 +1021,15 @@ order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 2001-01-01 2001-06-01 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01') PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01') POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -720,6 +1050,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -756,6 +1087,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -771,6 +1108,7 @@ POSTHOOK: query: SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 2001-01-01 PREHOOK: query: drop table vector_date_1 PREHOOK: type: DROPTABLE diff --git ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out index 4f1b509..f4863b7 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out @@ -32,6 +32,17 @@ POSTHOOK: Output: default@decimal_1 POSTHOOK: Lineage: decimal_1.t EXPRESSION [] POSTHOOK: Lineage: decimal_1.u EXPRESSION [] POSTHOOK: Lineage: decimal_1.v EXPRESSION [] +PREHOOK: query: insert into decimal_1 values (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: insert into decimal_1 values (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_1 +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +POSTHOOK: Lineage: decimal_1.u EXPRESSION [] +POSTHOOK: Lineage: decimal_1.v EXPRESSION [] PREHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_1 order by t PREHOOK: type: QUERY @@ -58,7 +69,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -70,7 +81,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToBoolean(col 0:decimal(4,2)) -> 4:boolean - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -80,7 +91,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -121,13 +132,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -147,6 +158,7 @@ POSTHOOK: query: select cast(t as boolean) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL true PREHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_1 order by t @@ -174,7 +186,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -186,7 +198,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToLong(col 0:decimal(4,2)) -> 4:tinyint - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -196,7 +208,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -237,13 +249,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -263,6 +275,7 @@ POSTHOOK: query: select cast(t as tinyint) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17 PREHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_1 order by t @@ -290,7 +303,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -302,7 +315,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToLong(col 0:decimal(4,2)) -> 4:smallint - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + @@ -312,7 +325,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -353,13 +366,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -379,6 +392,7 @@ POSTHOOK: query: select cast(t as smallint) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17 PREHOOK: query: explain vectorization detail select cast(t as int) from decimal_1 order by t @@ -406,7 +420,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -418,7 +432,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToLong(col 0:decimal(4,2)) -> 4:int - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -428,7 +442,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -469,13 +483,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -495,6 +509,7 @@ POSTHOOK: query: select cast(t as int) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17 PREHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_1 order by t @@ -522,7 +537,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -534,7 +549,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToLong(col 0:decimal(4,2)) -> 4:bigint - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -544,7 +559,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -585,13 +600,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -611,6 +626,7 @@ POSTHOOK: query: select cast(t as bigint) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17 PREHOOK: query: explain vectorization detail select cast(t as float) from decimal_1 order by t @@ -638,7 +654,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -650,7 +666,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToDouble(col 0:decimal(4,2)) -> 4:float - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + @@ -660,7 +676,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -701,13 +717,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -727,6 +743,7 @@ POSTHOOK: query: select cast(t as float) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17.29 PREHOOK: query: explain vectorization detail select cast(t as double) from decimal_1 order by t @@ -754,7 +771,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -766,7 +783,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToDouble(col 0:decimal(4,2)) -> 4:double - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -776,7 +793,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -817,13 +834,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -843,6 +860,7 @@ POSTHOOK: query: select cast(t as double) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17.29 PREHOOK: query: explain vectorization detail select cast(t as string) from decimal_1 order by t @@ -870,7 +888,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -882,7 +900,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToString(col 0:decimal(4,2)) -> 4:string - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -892,7 +910,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -933,13 +951,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -959,6 +977,7 @@ POSTHOOK: query: select cast(t as string) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17.29 PREHOOK: query: explain vectorization detail select cast(t as timestamp) from decimal_1 order by t @@ -986,7 +1005,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -998,7 +1017,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToTimestamp(col 0:decimal(4,2)) -> 4:timestamp - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + @@ -1008,7 +1027,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1049,13 +1068,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1075,6 +1094,7 @@ POSTHOOK: query: select cast(t as timestamp) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 1969-12-31 16:00:17.29 PREHOOK: query: drop table decimal_1 PREHOOK: type: DROPTABLE diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index 32e2088..0a72b3f 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -20,6 +20,18 @@ POSTHOOK: Lineage: decimal_vgby.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_vgby.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into decimal_vgby values (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_vgby +POSTHOOK: query: insert into decimal_vgby values (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_vgby +POSTHOOK: Lineage: decimal_vgby.cdecimal1 EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby.cdecimal2 EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby.cdouble EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby.cint EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), @@ -56,7 +68,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:cint:int, 4:ROW__ID:struct] @@ -67,7 +79,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3] - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() Group By Vectorization: @@ -81,7 +93,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -92,7 +104,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -140,14 +152,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 6144 Data size: 1330950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1330955 Basic stats: COMPLETE Column stats: NONE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: FilterLongColGreaterLongScalar(col 9:bigint, val 1) predicate: (_col9 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 443650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -155,13 +167,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 2048 Data size: 443650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 443650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -235,7 +247,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:cint:int, 4:ROW__ID:struct] @@ -246,7 +258,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3] - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() Group By Vectorization: @@ -260,7 +272,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -271,7 +283,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -319,14 +331,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 6144 Data size: 1330950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1330955 Basic stats: COMPLETE Column stats: NONE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: FilterLongColGreaterLongScalar(col 15:bigint, val 1) predicate: (_col15 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 443650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -334,13 +346,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - Statistics: Num rows: 2048 Data size: 443650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 443650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -400,6 +412,18 @@ POSTHOOK: Lineage: decimal_vgby_small.cdecimal1 EXPRESSION [(alltypesorc)alltype POSTHOOK: Lineage: decimal_vgby_small.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby_small.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby_small.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into decimal_vgby_small values (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_vgby_small +POSTHOOK: query: insert into decimal_vgby_small values (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_vgby_small +POSTHOOK: Lineage: decimal_vgby_small.cdecimal1 EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby_small.cdecimal2 EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby_small.cdouble EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby_small.cint EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), @@ -436,7 +460,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby_small - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(11,5), 2:cdecimal2:decimal(16,0), 3:cint:int, 4:ROW__ID:struct] @@ -447,7 +471,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3] - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() Group By Vectorization: @@ -461,7 +485,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -472,7 +496,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: bigint), _col6 (type: decimal(16,0)), _col7 (type: decimal(16,0)), _col8 (type: decimal(26,0)), _col9 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs @@ -521,14 +545,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 6144 Data size: 1330950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1330955 Basic stats: COMPLETE Column stats: NONE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: FilterLongColGreaterLongScalar(col 9:bigint, val 1) predicate: (_col9 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 443650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: bigint), _col6 (type: decimal(16,0)), _col7 (type: decimal(16,0)), _col8 (type: decimal(26,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -536,13 +560,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 2048 Data size: 443650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 443650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -580,6 +604,25 @@ POSTHOOK: Input: default@decimal_vgby_small 6981 2 -515.62107 -515.62107 -1031.24214 3 6984454 -618 6983218 762 1 1531.21941 1531.21941 1531.21941 2 6984454 1834 6986288 NULL 3072 9318.43514 -4298.15135 5018444.11392 3072 11161 -5148 6010880 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_vgby_small +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_vgby_small +#### A masked pattern was here #### +-18663521580 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), @@ -616,7 +659,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby_small - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(11,5), 2:cdecimal2:decimal(16,0), 3:cint:int, 4:ROW__ID:struct] @@ -627,7 +670,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3] - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() Group By Vectorization: @@ -641,7 +684,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -652,7 +695,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - Statistics: Num rows: 12288 Data size: 2661900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs @@ -701,14 +744,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 6144 Data size: 1330950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1330955 Basic stats: COMPLETE Column stats: NONE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: FilterLongColGreaterLongScalar(col 15:bigint, val 1) predicate: (_col15 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 443650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: decimal(15,9)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: decimal(20,4)), _col13 (type: double), _col14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -716,13 +759,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - Statistics: Num rows: 2048 Data size: 443650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 443650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -760,3 +803,22 @@ POSTHOOK: Input: default@decimal_vgby_small 6981 2 -515.62107 -515.62107 -1031.24214 -515.621070000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175 762 1 1531.21941 1531.21941 1531.21941 1531.219410000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881 NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.608110000 5695.483083909642 5696.410309489072 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_vgby_small +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_vgby_small +#### A masked pattern was here #### +91757235680 diff --git ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out index d63eeb7..7dbe584 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out @@ -1,13 +1,30 @@ -PREHOOK: query: CREATE TABLE decimal_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@alltypesorc +PREHOOK: query: CREATE TABLE decimal_test (cdouble double,cdecimal1 DECIMAL(20,10), cdecimal2 DECIMAL(23,14)) STORED AS ORC +PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@decimal_test -POSTHOOK: query: CREATE TABLE decimal_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@alltypesorc +POSTHOOK: query: CREATE TABLE decimal_test (cdouble double,cdecimal1 DECIMAL(20,10), cdecimal2 DECIMAL(23,14)) STORED AS ORC +POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@decimal_test +PREHOOK: query: insert into decimal_test values (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_test +POSTHOOK: query: insert into decimal_test values (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_test +POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [] +POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [] +POSTHOOK: Lineage: decimal_test.cdouble EXPRESSION [] +PREHOOK: query: INSERT INTO TABLE decimal_test SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@decimal_test +POSTHOOK: query: INSERT INTO TABLE decimal_test SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@decimal_test POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] @@ -41,7 +58,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_test - Statistics: Num rows: 12288 Data size: 2708600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2708832 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:ROW__ID:struct] @@ -158,6 +175,19 @@ POSTHOOK: Input: default@decimal_test 1895.51268191268460 -1203.53347193346920 0.8371969190171 262050.87567567649292835 2.4972972973 862 1033 NULL 862 true 1033.0153846153846 862.4973 1033.0153846153846 1969-12-31 16:14:22.497297297 1909.95218295221550 -1212.70166320163100 0.8371797936946 266058.54729730725574014 9.0675675676 869 1040 NULL 869 true 1040.8846153846155 869.06757 1040.8846153846155 1969-12-31 16:14:29.067567567 1913.89022869026920 -1215.20207900203840 0.8371751679996 267156.82702703945592392 0.8594594595 870 1043 NULL 870 true 1043.0307692307692 870.85944 1043.0307692307692 1969-12-31 16:14:30.859459459 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_test +#### A masked pattern was here #### +-1300490595129 PREHOOK: query: CREATE TABLE decimal_test_small STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(10,3)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(7,2)) AS cdecimal2 FROM alltypesorc PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@alltypesorc @@ -318,3 +348,16 @@ POSTHOOK: Input: default@decimal_test_small 1895.517 -1203.543 0.83719289075 262051.956361764 2.497 862 1033 NULL 862 true 1033.02 862.497 1033.02 1969-12-31 16:14:22.497 1909.948 -1212.692 0.83718392130 266057.499543968 9.068 869 1040 NULL 869 true 1040.88 869.068 1040.88 1969-12-31 16:14:29.068 1913.889 -1215.201 0.83717534491 267156.488691411 0.859 870 1043 NULL 870 true 1043.03 870.859 1043.03 1969-12-31 16:14:30.859 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test_small WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_test_small +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test_small WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_test_small +#### A masked pattern was here #### +774841630076 diff --git ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out index 270b634..e9023a4 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_math_funcs.q.out @@ -12,6 +12,18 @@ POSTHOOK: Lineage: decimal_test.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSc POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +PREHOOK: query: insert into decimal_test values (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_test +POSTHOOK: query: insert into decimal_test values (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_test +POSTHOOK: Lineage: decimal_test.cbigint EXPRESSION [] +POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [] +POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [] +POSTHOOK: Lineage: decimal_test.cdouble EXPRESSION [] PREHOOK: query: explain vectorization detail select cdecimal1 @@ -103,7 +115,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_test - Statistics: Num rows: 12288 Data size: 1401000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1401120 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cbigint:bigint, 1:cdouble:double, 2:cdecimal1:decimal(20,10), 3:cdecimal2:decimal(23,14), 4:ROW__ID:struct] diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index 56248d1..a306a17 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -48,6 +48,16 @@ POSTHOOK: Input: default@decimal_udf_txt POSTHOOK: Output: default@decimal_udf POSTHOOK: Lineage: decimal_udf.key SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] POSTHOOK: Lineage: decimal_udf.value SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:value, type:int, comment:null), ] +PREHOOK: query: insert into DECIMAL_UDF values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_udf +POSTHOOK: query: insert into DECIMAL_UDF values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_udf +POSTHOOK: Lineage: decimal_udf.key EXPRESSION [] +POSTHOOK: Lineage: decimal_udf.value EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key + key FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -71,7 +81,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -83,13 +93,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: DecimalColAddDecimalColumn(col 0:decimal(20,10), col 0:decimal(20,10)) -> 3:decimal(21,10) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -126,44 +136,45 @@ POSTHOOK: query: SELECT key + key FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-0.6000000000 +-0.6600000000 +-0.6660000000 +-2.2400000000 +-2.2400000000 +-2.2440000000 +-2469135780.2469135780 +-2510.9800000000 -8800.0000000000 -NULL 0.0000000000 0.0000000000 -200.0000000000 -20.0000000000 -2.0000000000 -0.2000000000 -0.0200000000 -400.0000000000 -40.0000000000 -4.0000000000 0.0000000000 -0.4000000000 +0.0200000000 0.0400000000 +0.2000000000 +0.4000000000 0.6000000000 0.6600000000 0.6660000000 --0.6000000000 --0.6600000000 --0.6660000000 2.0000000000 -4.0000000000 -6.2800000000 --2.2400000000 --2.2400000000 --2.2440000000 +2.0000000000 +2.0000000000 2.2400000000 2.2440000000 +20.0000000000 +200.0000000000 +2469135780.2469135600 248.0000000000 250.4000000000 --2510.9800000000 +4.0000000000 +4.0000000000 +40.0000000000 +400.0000000000 6.2800000000 6.2800000000 6.2800000000 -2.0000000000 --2469135780.2469135780 -2469135780.2469135600 +6.2800000000 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key + value FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -187,7 +198,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -199,13 +210,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DecimalColAddDecimalColumn(col 0:decimal(20,10), col 3:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 3:decimal(10,0)) -> 4:decimal(21,10) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -242,44 +253,45 @@ POSTHOOK: query: SELECT key + value FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-0.3000000000 +-0.3300000000 +-0.3330000000 +-12.1220000000 +-2.1200000000 +-2.1200000000 +-2469135780.1234567890 +-2510.4900000000 0.0000000000 -NULL 0.0000000000 0.0000000000 -200.0000000000 -20.0000000000 -2.0000000000 -0.1000000000 -0.0100000000 -400.0000000000 -40.0000000000 -4.0000000000 0.0000000000 -0.2000000000 +0.0100000000 0.0200000000 +0.1000000000 +0.2000000000 0.3000000000 0.3300000000 0.3330000000 --0.3000000000 --0.3300000000 --0.3330000000 2.0000000000 -4.0000000000 -6.1400000000 --2.1200000000 --2.1200000000 --12.1220000000 +2.0000000000 +2.0000000000 2.1200000000 2.1220000000 +20.0000000000 +200.0000000000 +2469135780.1234567800 248.0000000000 250.2000000000 --2510.4900000000 +4.0000000000 +4.0000000000 +40.0000000000 +400.0000000000 +6.1400000000 6.1400000000 6.1400000000 7.1400000000 -2.0000000000 --2469135780.1234567890 -2469135780.1234567800 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key + (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -303,7 +315,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -315,13 +327,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColAddDoubleColumn(col 3:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double, DoubleColDivideDoubleScalar(col 4:double, val 2.0)(children: CastLongToDouble(col 1:int) -> 4:double) -> 5:double) -> 4:double - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -358,44 +370,45 @@ POSTHOOK: query: SELECT key + (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-0.3 +-0.33 +-0.333 +-1.62 +-1.62 +-1.8518518351234567E9 +-1882.99 -2200.0 -NULL +-6.622 0.0 0.0 -150.0 -15.0 -1.5 -0.1 -0.01 -300.0 -30.0 -3.0 0.0 -0.2 +0.01 0.02 +0.1 +0.2 0.3 0.33 0.333 --0.3 --0.33 --0.333 1.5 -3.0 -4.640000000000001 --1.62 --1.62 --6.622 +1.5 +1.5 1.62 1.622 +1.8518518351234567E9 +15.0 +150.0 186.0 187.7 --1882.99 +3.0 +3.0 +30.0 +300.0 +4.640000000000001 4.640000000000001 4.640000000000001 5.140000000000001 -1.5 --1.8518518351234567E9 -1.8518518351234567E9 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key + '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -419,7 +432,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -431,13 +444,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColAddDoubleScalar(col 3:double, val 1.0)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double) -> 4:double - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -474,44 +487,45 @@ POSTHOOK: query: SELECT key + '1.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-0.1200000000000001 +-0.1200000000000001 +-0.12200000000000011 +-1.2345678891234567E9 +-1254.49 -4399.0 -NULL +0.667 +0.6699999999999999 +0.7 1.0 1.0 -101.0 -11.0 -2.0 -1.1 -1.01 -201.0 -21.0 -3.0 1.0 -1.2 +1.01 1.02 +1.1 +1.2 +1.2345678911234567E9 1.3 1.33 1.333 -0.7 -0.6699999999999999 -0.667 +101.0 +11.0 +125.0 +126.2 +2.0 +2.0 2.0 -3.0 -4.140000000000001 --0.1200000000000001 --0.1200000000000001 --0.12200000000000011 2.12 2.122 -125.0 -126.2 --1254.49 +201.0 +21.0 +3.0 +3.0 4.140000000000001 4.140000000000001 4.140000000000001 -2.0 --1.2345678891234567E9 -1.2345678911234567E9 +4.140000000000001 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key - key FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -535,7 +549,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -547,13 +561,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: DecimalColSubtractDecimalColumn(col 0:decimal(20,10), col 0:decimal(20,10)) -> 3:decimal(21,10) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -591,7 +605,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### 0.0000000000 -NULL 0.0000000000 0.0000000000 0.0000000000 @@ -628,6 +641,8 @@ NULL 0.0000000000 0.0000000000 0.0000000000 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key - value FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -651,7 +666,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -663,13 +678,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DecimalColSubtractDecimalColumn(col 0:decimal(20,10), col 3:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 3:decimal(10,0)) -> 4:decimal(21,10) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -706,44 +721,45 @@ POSTHOOK: query: SELECT key - value FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-0.1200000000 +-0.1200000000 +-0.1234567890 +-0.3000000000 +-0.3300000000 +-0.3330000000 +-0.4900000000 +-0.8600000000 -8800.0000000000 -NULL 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 -0.1000000000 -0.0100000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 -0.2000000000 -0.0200000000 -0.3000000000 -0.3300000000 -0.3330000000 --0.3000000000 --0.3300000000 --0.3330000000 0.0000000000 0.0000000000 -0.1400000000 --0.1200000000 --0.1200000000 -9.8780000000 +0.0000000000 +0.0000000000 +0.0100000000 +0.0200000000 +0.1000000000 0.1200000000 0.1220000000 -0.0000000000 -0.2000000000 --0.4900000000 +0.1234567800 0.1400000000 0.1400000000 --0.8600000000 -0.0000000000 --0.1234567890 -0.1234567800 +0.1400000000 +0.2000000000 +0.2000000000 +0.3000000000 +0.3300000000 +0.3330000000 +9.8780000000 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key - (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -767,7 +783,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -779,13 +795,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColSubtractDoubleColumn(col 3:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double, DoubleColDivideDoubleScalar(col 4:double, val 2.0)(children: CastLongToDouble(col 1:int) -> 4:double) -> 5:double) -> 4:double - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -822,44 +838,45 @@ POSTHOOK: query: SELECT key - (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-0.3 +-0.33 +-0.333 +-0.6200000000000001 +-0.6200000000000001 +-6.172839451234567E8 +-627.99 -6600.0 -NULL 0.0 0.0 -50.0 -5.0 -0.5 -0.1 -0.01 -100.0 -10.0 -1.0 0.0 -0.2 +0.01 0.02 +0.1 +0.2 0.3 0.33 0.333 --0.3 --0.33 --0.333 0.5 -1.0 -1.6400000000000001 --0.6200000000000001 --0.6200000000000001 -4.378 +0.5 +0.5 0.6200000000000001 0.6220000000000001 -62.0 -62.7 --627.99 +1.0 +1.0 +1.1400000000000001 1.6400000000000001 1.6400000000000001 -1.1400000000000001 -0.5 --6.172839451234567E8 +1.6400000000000001 +10.0 +100.0 +4.378 +5.0 +50.0 6.172839451234567E8 +62.0 +62.7 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key - '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -883,7 +900,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -895,13 +912,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColSubtractDoubleScalar(col 3:double, val 1.0)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double) -> 4:double - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -938,44 +955,45 @@ POSTHOOK: query: SELECT key - '1.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### --4401.0 -NULL --1.0 --1.0 -99.0 -9.0 -0.0 +-0.667 +-0.6699999999999999 +-0.7 +-0.8 -0.9 +-0.98 -0.99 -199.0 -19.0 -1.0 -1.0 --0.8 --0.98 --0.7 --0.6699999999999999 --0.667 +-1.0 +-1.0 +-1.2345678911234567E9 -1.3 -1.33 -1.333 -0.0 -1.0 -2.14 +-1256.49 -2.12 -2.12 -2.122 +-4401.0 +0.0 +0.0 +0.0 0.1200000000000001 0.12200000000000011 +1.0 +1.0 +1.2345678891234567E9 123.0 124.2 --1256.49 +19.0 +199.0 2.14 2.14 2.14 -0.0 --1.2345678911234567E9 -1.2345678891234567E9 +2.14 +9.0 +99.0 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key * key FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -999,7 +1017,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1011,13 +1029,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: DecimalColMultiplyDecimalColumn(col 0:decimal(20,10), col 0:decimal(20,10)) -> 3:decimal(38,17) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1054,44 +1072,45 @@ POSTHOOK: query: SELECT key * key FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -19360000.00000000000000000 -NULL 0.00000000000000000 0.00000000000000000 -10000.00000000000000000 -100.00000000000000000 -1.00000000000000000 -0.01000000000000000 -0.00010000000000000 -40000.00000000000000000 -400.00000000000000000 -4.00000000000000000 0.00000000000000000 -0.04000000000000000 +0.00010000000000000 0.00040000000000000 +0.01000000000000000 +0.04000000000000000 0.09000000000000000 -0.10890000000000000 -0.11088900000000000 0.09000000000000000 0.10890000000000000 +0.10890000000000000 +0.11088900000000000 0.11088900000000000 1.00000000000000000 -4.00000000000000000 -9.85960000000000000 +1.00000000000000000 +1.00000000000000000 1.25440000000000000 1.25440000000000000 -1.25888400000000000 1.25440000000000000 1.25888400000000000 +1.25888400000000000 +100.00000000000000000 +10000.00000000000000000 +1524157875323883652.79682997652796840 +1524157875323883675.01905199875019052 15376.00000000000000000 15675.04000000000000000 1576255.14010000000000000 +19360000.00000000000000000 +4.00000000000000000 +4.00000000000000000 +400.00000000000000000 +40000.00000000000000000 9.85960000000000000 9.85960000000000000 9.85960000000000000 -1.00000000000000000 -1524157875323883675.01905199875019052 -1524157875323883652.79682997652796840 +9.85960000000000000 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key, value FROM DECIMAL_UDF where key * value > 0 PREHOOK: type: QUERY @@ -1115,7 +1134,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1125,7 +1144,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColGreaterDecimalScalar(col 4:decimal(31,10), val 0)(children: DecimalColMultiplyDecimalColumn(col 0:decimal(20,10), col 3:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 3:decimal(10,0)) -> 4:decimal(31,10)) predicate: ((key * CAST( value AS decimal(10,0))) > 0) (type: boolean) - Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: _col0, _col1 @@ -1133,13 +1152,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1176,29 +1195,29 @@ POSTHOOK: query: SELECT key, value FROM DECIMAL_UDF where key * value > 0 POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -100.0000000000 100 -10.0000000000 10 -1.0000000000 1 -200.0000000000 200 -20.0000000000 20 -2.0000000000 2 -1.0000000000 1 -2.0000000000 2 -3.1400000000 3 -1.1200000000 -1 -1.1200000000 -1 -1.1220000000 -11 +-1234567890.1234567890 -1234567890 +-1255.4900000000 -1255 +1.0000000000 1 +1.0000000000 1 +1.0000000000 1 1.1200000000 1 1.1220000000 1 +10.0000000000 10 +100.0000000000 100 +1234567890.1234567800 1234567890 124.0000000000 124 125.2000000000 125 --1255.4900000000 -1255 +2.0000000000 2 +2.0000000000 2 +20.0000000000 20 +200.0000000000 200 +3.1400000000 3 3.1400000000 3 3.1400000000 3 3.1400000000 4 -1.0000000000 1 --1234567890.1234567890 -1234567890 -1234567890.1234567800 1234567890 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key * value FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1222,7 +1241,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1234,13 +1253,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DecimalColMultiplyDecimalColumn(col 0:decimal(20,10), col 3:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 3:decimal(10,0)) -> 4:decimal(31,10) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1278,17 +1297,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -19360000.0000000000 -NULL 0.0000000000 0.0000000000 -10000.0000000000 -100.0000000000 -1.0000000000 0.0000000000 0.0000000000 -40000.0000000000 -400.0000000000 -4.0000000000 0.0000000000 0.0000000000 0.0000000000 @@ -1299,22 +1311,30 @@ NULL 0.0000000000 0.0000000000 1.0000000000 -4.0000000000 -9.4200000000 +1.0000000000 +1.0000000000 1.1200000000 1.1200000000 -12.3420000000 1.1200000000 1.1220000000 +100.0000000000 +10000.0000000000 +12.3420000000 +12.5600000000 +1524157875171467876.3907942000 +1524157875171467887.5019052100 15376.0000000000 15650.0000000000 1575639.9500000000 +4.0000000000 +4.0000000000 +400.0000000000 +40000.0000000000 9.4200000000 9.4200000000 -12.5600000000 -1.0000000000 -1524157875171467887.5019052100 -1524157875171467876.3907942000 +9.4200000000 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key * (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1338,7 +1358,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1350,13 +1370,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColMultiplyDoubleColumn(col 3:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double, DoubleColDivideDoubleScalar(col 4:double, val 2.0)(children: CastLongToDouble(col 1:int) -> 4:double) -> 5:double) -> 4:double - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1393,44 +1413,45 @@ POSTHOOK: query: SELECT key * (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-0.0 +-0.0 +-0.0 -9680000.0 -NULL 0.0 0.0 -5000.0 -50.0 -0.5 0.0 0.0 -20000.0 -200.0 -2.0 0.0 0.0 0.0 0.0 0.0 0.0 --0.0 --0.0 --0.0 0.5 -2.0 -4.71 +0.5 +0.5 0.56 0.56 -6.171 0.56 0.561 -7688.0 -7825.0 -787819.975 +2.0 +2.0 +200.0 +20000.0 +4.71 4.71 4.71 +50.0 +5000.0 +6.171 6.28 -0.5 7.6207893758573389E17 7.6207893758573389E17 +7688.0 +7825.0 +787819.975 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key * '2.0' FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1454,7 +1475,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1466,13 +1487,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColMultiplyDoubleScalar(col 3:double, val 2.0)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double) -> 4:double - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1509,49 +1530,50 @@ POSTHOOK: query: SELECT key * '2.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-0.6 +-0.66 +-0.666 +-2.24 +-2.24 +-2.244 +-2.4691357802469134E9 +-2510.98 -8800.0 -NULL 0.0 0.0 -200.0 -20.0 -2.0 -0.2 -0.02 -400.0 -40.0 -4.0 0.0 -0.4 +0.02 0.04 +0.2 +0.4 0.6 0.66 0.666 --0.6 --0.66 --0.666 2.0 -4.0 -6.28 --2.24 --2.24 --2.244 +2.0 +2.0 2.24 2.244 +2.4691357802469134E9 +20.0 +200.0 248.0 250.4 --2510.98 +4.0 +4.0 +40.0 +400.0 6.28 6.28 6.28 -2.0 --2.4691357802469134E9 -2.4691357802469134E9 +6.28 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT key / 0 FROM DECIMAL_UDF limit 1 +SELECT key / 0 FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT key / 0 FROM DECIMAL_UDF limit 1 +SELECT key / 0 FROM DECIMAL_UDF POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -1570,7 +1592,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1582,23 +1604,17 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: DecimalColDivideDecimalScalar(col 0:decimal(20,10), val 0) -> 3:decimal(22,12) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1619,19 +1635,57 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator - limit: 1 + limit: -1 Processor Tree: ListSink -PREHOOK: query: SELECT key / 0 FROM DECIMAL_UDF limit 1 +PREHOOK: query: SELECT key / 0 FROM DECIMAL_UDF PREHOOK: type: QUERY PREHOOK: Input: default@decimal_udf #### A masked pattern was here #### -POSTHOOK: query: SELECT key / 0 FROM DECIMAL_UDF limit 1 +POSTHOOK: query: SELECT key / 0 FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 PREHOOK: type: QUERY @@ -1655,7 +1709,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1665,7 +1719,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColNotEqualDecimalScalar(col 0:decimal(20,10), val 0) predicate: (key <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key / key) (type: decimal(38,18)) outputColumnNames: _col0 @@ -1674,13 +1728,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: DecimalColDivideDecimalColumn(col 0:decimal(20,10), col 0:decimal(20,10)) -> 3:decimal(38,18) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1774,7 +1828,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1784,7 +1838,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColNotEqualLongScalar(col 1:int, val 0) predicate: (value <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key / CAST( value AS decimal(10,0))) (type: decimal(31,21)) outputColumnNames: _col0 @@ -1793,13 +1847,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DecimalColDivideDecimalColumn(col 0:decimal(20,10), col 3:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 3:decimal(10,0)) -> 4:decimal(31,21) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1837,6 +1891,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -1.000000000000000000000 +0.102000000000000000000 +0.785000000000000000000 +1.000000000000000000000 +1.000000000000000000000 1.000000000000000000000 1.000000000000000000000 1.000000000000000000000 @@ -1845,21 +1903,17 @@ POSTHOOK: Input: default@decimal_udf 1.000000000000000000000 1.000000000000000000000 1.000000000000000000000 +1.000000000099999992710 +1.000000000100000000000 +1.000390438247011952191 +1.001600000000000000000 +1.046666666666666666667 +1.046666666666666666667 1.046666666666666666667 1.120000000000000000000 1.120000000000000000000 -0.102000000000000000000 1.120000000000000000000 1.122000000000000000000 -1.000000000000000000000 -1.001600000000000000000 -1.000390438247011952191 -1.046666666666666666667 -1.046666666666666666667 -0.785000000000000000000 -1.000000000000000000000 -1.000000000100000000000 -1.000000000099999992710 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 PREHOOK: type: QUERY @@ -1883,7 +1937,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1893,7 +1947,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColNotEqualLongScalar(col 1:int, val 0) predicate: (value <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) / (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 @@ -1902,13 +1956,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColDivideDoubleColumn(col 3:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double, DoubleColDivideDoubleScalar(col 4:double, val 2.0)(children: CastLongToDouble(col 1:int) -> 4:double) -> 5:double) -> 4:double - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1946,6 +2000,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -2.0 +0.20400000000000001 +1.57 +2.0 +2.0 2.0 2.0 2.0 @@ -1954,21 +2012,17 @@ POSTHOOK: Input: default@decimal_udf 2.0 2.0 2.0 +2.0000000002 +2.0000000002 +2.000780876494024 +2.0032 +2.0933333333333333 +2.0933333333333333 2.0933333333333333 2.24 2.24 -0.20400000000000001 2.24 2.244 -2.0 -2.0032 -2.000780876494024 -2.0933333333333333 -2.0933333333333333 -1.57 -2.0 -2.0000000002 -2.0000000002 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT 1 + (key / '2.0') FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -1992,7 +2046,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2004,13 +2058,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: DoubleScalarAddDoubleColumn(val 1.0, col 4:double)(children: DoubleColDivideDoubleScalar(col 3:double, val 2.0)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double) -> 4:double) -> 3:double - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2048,43 +2102,44 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -2199.0 -NULL +-6.172839440617284E8 +-626.745 +0.43899999999999995 +0.43999999999999995 +0.43999999999999995 +0.8335 +0.835 +0.85 1.0 1.0 -51.0 -6.0 -1.5 -1.05 -1.005 -101.0 -11.0 -2.0 1.0 -1.1 +1.005 1.01 +1.05 +1.1 1.15 1.165 1.1665 -0.85 -0.835 -0.8335 1.5 -2.0 -2.5700000000000003 -0.43999999999999995 -0.43999999999999995 -0.43899999999999995 +1.5 +1.5 1.56 1.561 -63.0 -63.6 --626.745 +101.0 +11.0 +2.0 +2.0 2.5700000000000003 2.5700000000000003 2.5700000000000003 -1.5 --6.172839440617284E8 +2.5700000000000003 +51.0 +6.0 6.172839460617284E8 +63.0 +63.6 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT abs(key) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -2108,7 +2163,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2120,13 +2175,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: FuncAbsDecimalToDecimal(col 0:decimal(20,10)) -> 3:decimal(20,10) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2163,44 +2218,45 @@ POSTHOOK: query: SELECT abs(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -4400.0000000000 -NULL 0.0000000000 0.0000000000 -100.0000000000 -10.0000000000 -1.0000000000 -0.1000000000 -0.0100000000 -200.0000000000 -20.0000000000 -2.0000000000 0.0000000000 -0.2000000000 +0.0100000000 0.0200000000 +0.1000000000 +0.2000000000 0.3000000000 -0.3300000000 -0.3330000000 0.3000000000 0.3300000000 +0.3300000000 +0.3330000000 0.3330000000 1.0000000000 -2.0000000000 -3.1400000000 +1.0000000000 +1.0000000000 1.1200000000 1.1200000000 -1.1220000000 1.1200000000 1.1220000000 +1.1220000000 +10.0000000000 +100.0000000000 +1234567890.1234567800 +1234567890.1234567890 124.0000000000 125.2000000000 1255.4900000000 +2.0000000000 +2.0000000000 +20.0000000000 +200.0000000000 3.1400000000 3.1400000000 3.1400000000 -1.0000000000 -1234567890.1234567890 -1234567890.1234567800 +3.1400000000 +4400.0000000000 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value PREHOOK: type: QUERY @@ -2228,7 +2284,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2239,7 +2295,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(key), count(key), avg(key) Group By Vectorization: @@ -2253,7 +2309,7 @@ STAGE PLANS: keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 17 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18 Data size: 7416 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -2264,7 +2320,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2, 3] - Statistics: Num rows: 17 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18 Data size: 7416 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2382,23 +2438,24 @@ POSTHOOK: query: SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DE POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-1 -1.120000000000000000 -1.12000000000000 -2.2400000000 +-11 -1.122000000000000000 -1.12200000000000 -1.1220000000 -1234567890 -1234567890.123456789000000000 -1234567890.12345678900000 -1234567890.1234567890 -1255 -1255.490000000000000000 -1255.49000000000000 -1255.4900000000 --11 -1.122000000000000000 -1.12200000000000 -1.1220000000 --1 -1.120000000000000000 -1.12000000000000 -2.2400000000 0 0.025384615384615385 0.02538461538462 0.3300000000 1 1.048400000000000000 1.04840000000000 5.2420000000 -2 2.000000000000000000 2.00000000000000 4.0000000000 -3 3.140000000000000000 3.14000000000000 9.4200000000 -4 3.140000000000000000 3.14000000000000 3.1400000000 10 10.000000000000000000 10.00000000000000 10.0000000000 -20 20.000000000000000000 20.00000000000000 20.0000000000 100 100.000000000000000000 100.00000000000000 100.0000000000 +1234567890 1234567890.123456780000000000 1234567890.12345678000000 1234567890.1234567800 124 124.000000000000000000 124.00000000000000 124.0000000000 125 125.200000000000000000 125.20000000000000 125.2000000000 +2 2.000000000000000000 2.00000000000000 4.0000000000 +20 20.000000000000000000 20.00000000000000 20.0000000000 200 200.000000000000000000 200.00000000000000 200.0000000000 +3 3.140000000000000000 3.14000000000000 9.4200000000 +4 3.140000000000000000 3.14000000000000 3.1400000000 4400 -4400.000000000000000000 -4400.00000000000000 -4400.0000000000 -1234567890 1234567890.123456780000000000 1234567890.12345678000000 1234567890.1234567800 +NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT -key FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -2422,7 +2479,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2434,13 +2491,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: FuncNegateDecimalToDecimal(col 0:decimal(20,10)) -> 3:decimal(20,10) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2477,44 +2534,45 @@ POSTHOOK: query: SELECT -key FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -4400.0000000000 -NULL -0.0000000000 -0.0000000000 --100.0000000000 --10.0000000000 --1.0000000000 --0.1000000000 -0.0100000000 --200.0000000000 --20.0000000000 --2.0000000000 -0.0000000000 --0.2000000000 -0.0200000000 +-0.1000000000 +-0.2000000000 -0.3000000000 -0.3300000000 -0.3330000000 -0.3000000000 -0.3300000000 -0.3330000000 -1.0000000000 --2.0000000000 --3.1400000000 -1.1200000000 -1.1200000000 -1.1220000000 +-1.0000000000 +-1.0000000000 -1.1200000000 -1.1220000000 +-10.0000000000 +-100.0000000000 +-1234567890.1234567800 -124.0000000000 -125.2000000000 -1255.4900000000 +-2.0000000000 +-2.0000000000 +-20.0000000000 +-200.0000000000 -3.1400000000 -3.1400000000 -3.1400000000 --1.0000000000 +-3.1400000000 +0.0000000000 +0.0000000000 +0.0000000000 +0.3000000000 +0.3300000000 +0.3330000000 +1.1200000000 +1.1200000000 +1.1220000000 1234567890.1234567890 --1234567890.1234567800 +1255.4900000000 +4400.0000000000 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT +key FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -2548,44 +2606,45 @@ POSTHOOK: query: SELECT +key FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-0.3000000000 +-0.3300000000 +-0.3330000000 +-1.1200000000 +-1.1200000000 +-1.1220000000 +-1234567890.1234567890 +-1255.4900000000 -4400.0000000000 -NULL 0.0000000000 0.0000000000 -100.0000000000 -10.0000000000 -1.0000000000 -0.1000000000 +0.0000000000 0.0100000000 -200.0000000000 -20.0000000000 -2.0000000000 -0.0000000000 -0.2000000000 0.0200000000 +0.1000000000 +0.2000000000 0.3000000000 0.3300000000 0.3330000000 --0.3000000000 --0.3300000000 --0.3330000000 1.0000000000 -2.0000000000 -3.1400000000 --1.1200000000 --1.1200000000 --1.1220000000 +1.0000000000 +1.0000000000 1.1200000000 1.1220000000 +10.0000000000 +100.0000000000 +1234567890.1234567800 124.0000000000 125.2000000000 --1255.4900000000 +2.0000000000 +2.0000000000 +20.0000000000 +200.0000000000 3.1400000000 3.1400000000 3.1400000000 -1.0000000000 --1234567890.1234567890 -1234567890.1234567800 +3.1400000000 +NULL +NULL PREHOOK: query: EXPlAIN SELECT CEIL(key) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPlAIN SELECT CEIL(key) FROM DECIMAL_UDF @@ -2603,14 +2662,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ceil(key) (type: decimal(11,0)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2632,44 +2691,45 @@ POSTHOOK: query: SELECT CEIL(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-1 +-1 +-1 +-1234567890 +-1255 -4400 -NULL 0 0 -100 -10 +0 +0 +0 +0 +1 1 1 1 -200 -20 -2 -0 1 1 1 1 1 -0 -0 -0 1 +10 +100 +1234567891 +124 +126 2 -4 --1 --1 --1 2 2 -124 -126 --1255 +2 +20 +200 4 4 4 -1 --1234567890 -1234567891 +4 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT FLOOR(key) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -2693,7 +2753,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2705,13 +2765,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: FuncFloorDecimalToDecimal(col 0:decimal(20,10)) -> 3:decimal(11,0) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2748,44 +2808,45 @@ POSTHOOK: query: SELECT FLOOR(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-1 +-1 +-1 +-1234567891 +-1256 +-2 +-2 +-2 -4400 -NULL 0 0 -100 -10 -1 0 0 -200 -20 -2 0 0 0 0 0 0 --1 --1 --1 1 -2 -3 --2 --2 --2 1 1 +1 +1 +10 +100 +1234567890 124 125 --1256 +2 +2 +20 +200 3 3 3 -1 --1234567891 -1234567890 +3 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ROUND(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -2809,7 +2870,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2821,13 +2882,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(20,10), decimalPlaces 2) -> 3:decimal(13,2) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2864,44 +2925,45 @@ POSTHOOK: query: SELECT ROUND(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-0.30 +-0.33 +-0.33 +-1.12 +-1.12 +-1.12 +-1234567890.12 +-1255.49 -4400.00 -NULL 0.00 0.00 -100.00 -10.00 -1.00 -0.10 -0.01 -200.00 -20.00 -2.00 0.00 -0.20 +0.01 0.02 +0.10 +0.20 0.30 0.33 0.33 --0.30 --0.33 --0.33 1.00 -2.00 -3.14 --1.12 --1.12 --1.12 +1.00 +1.00 1.12 1.12 +10.00 +100.00 +1234567890.12 124.00 125.20 --1255.49 +2.00 +2.00 +20.00 +200.00 3.14 3.14 3.14 -1.00 --1234567890.12 -1234567890.12 +3.14 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -2925,7 +2987,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2937,13 +2999,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: VectorUDFAdaptor(power(key, 2)) -> 3:double - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2980,44 +3042,45 @@ POSTHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -1.936E7 -NULL 0.0 0.0 -10000.0 -100.0 -1.0 -0.010000000000000002 -1.0E-4 -40000.0 -400.0 -4.0 0.0 +0.010000000000000002 0.04000000000000001 -4.0E-4 0.09 -0.10890000000000001 -0.11088900000000002 0.09 0.10890000000000001 +0.10890000000000001 +0.11088900000000002 0.11088900000000002 1.0 -4.0 -9.8596 +1.0 +1.0 +1.0E-4 1.2544000000000002 1.2544000000000002 -1.2588840000000003 1.2544000000000002 1.2588840000000003 +1.2588840000000003 +1.52415787532388352E18 +1.52415787532388352E18 +1.936E7 +100.0 +10000.0 15376.0 15675.04 1576255.1401 +4.0 +4.0 +4.0E-4 +400.0 +40000.0 9.8596 9.8596 9.8596 -1.0 -1.52415787532388352E18 -1.52415787532388352E18 +9.8596 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -3041,7 +3104,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -3053,13 +3116,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5] selectExpressions: DecimalColModuloDecimalColumn(col 3:decimal(21,10), col 4:decimal(22,12))(children: DecimalColAddDecimalScalar(col 0:decimal(20,10), val 1) -> 3:decimal(21,10), DecimalColDivideDecimalScalar(col 0:decimal(20,10), val 2) -> 4:decimal(22,12)) -> 5:decimal(22,12) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3096,44 +3159,45 @@ POSTHOOK: query: SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-0.120000000000 +-0.120000000000 +-0.122000000000 -2199.000000000000 -NULL -NULL -NULL -1.000000000000 -1.000000000000 +-617283944.061728394500 +-626.745000000000 0.000000000000 0.000000000000 0.000000000000 -1.000000000000 -1.000000000000 0.000000000000 -NULL 0.000000000000 0.000000000000 -0.100000000000 -0.010000000000 -0.001000000000 -0.100000000000 -0.010000000000 -0.001000000000 0.000000000000 0.000000000000 -1.000000000000 --0.120000000000 --0.120000000000 --0.122000000000 -0.440000000000 +0.000000000000 +0.001000000000 +0.001000000000 +0.010000000000 +0.010000000000 +0.100000000000 +0.100000000000 0.439000000000 +0.440000000000 +1.000000000000 +1.000000000000 1.000000000000 1.000000000000 --626.745000000000 1.000000000000 1.000000000000 1.000000000000 -0.000000000000 --617283944.061728394500 1.000000000000 +1.000000000000 +1.000000000000 +1.000000000000 +NULL +NULL +NULL +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY @@ -3160,7 +3224,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -3171,7 +3235,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev(key), variance(key) Group By Vectorization: @@ -3185,7 +3249,7 @@ STAGE PLANS: keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17 Data size: 2788 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3196,7 +3260,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 17 Data size: 2788 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3270,23 +3334,24 @@ POSTHOOK: query: SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -4 0.0 0.0 +-1 0.0 0.0 +-11 0.0 0.0 -1234567890 0.0 0.0 +-1255 0.0 0.0 0 0.22561046704494161 0.050900082840236685 1 0.05928102563215321 0.0035142400000000066 -2 0.0 0.0 -3 0.0 0.0 -124 0.0 0.0 -200 0.0 0.0 -4400 0.0 0.0 -1234567890 0.0 0.0 10 0.0 0.0 +100 0.0 0.0 +1234567890 0.0 0.0 +124 0.0 0.0 125 0.0 0.0 --1255 0.0 0.0 --11 0.0 0.0 --1 0.0 0.0 +2 0.0 0.0 20 0.0 0.0 -100 0.0 0.0 +200 0.0 0.0 +3 0.0 0.0 +4 0.0 0.0 +4400 0.0 0.0 +NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY @@ -3313,7 +3378,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -3324,7 +3389,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev_samp(key), var_samp(key) Group By Vectorization: @@ -3338,7 +3403,7 @@ STAGE PLANS: keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17 Data size: 2788 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3349,7 +3414,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 17 Data size: 2788 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3423,23 +3488,24 @@ POSTHOOK: query: SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### -4 NULL NULL +-1 0.0 0.0 +-11 NULL NULL -1234567890 NULL NULL +-1255 NULL NULL 0 0.2348228191855647 0.055141756410256405 1 0.06627820154470102 0.004392800000000008 -2 0.0 0.0 -3 0.0 0.0 -124 NULL NULL -200 NULL NULL -4400 NULL NULL -1234567890 NULL NULL 10 NULL NULL +100 NULL NULL +1234567890 NULL NULL +124 NULL NULL 125 NULL NULL --1255 NULL NULL --11 NULL NULL --1 0.0 0.0 +2 0.0 0.0 20 NULL NULL -100 NULL NULL +200 NULL NULL +3 0.0 0.0 +4 NULL NULL +4400 NULL NULL +NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF PREHOOK: type: QUERY @@ -3466,11 +3532,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: histogram_numeric(_col0, 3) mode: hash @@ -3550,7 +3616,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -3561,7 +3627,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(key) Group By Vectorization: @@ -3681,7 +3747,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -3692,7 +3758,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(key) Group By Vectorization: @@ -3812,7 +3878,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -3823,7 +3889,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(key) Group By Vectorization: @@ -3939,6 +4005,16 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DE POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@decimal_udf_txt_small +PREHOOK: query: insert into DECIMAL_UDF_txt_small values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_udf_txt_small +POSTHOOK: query: insert into DECIMAL_UDF_txt_small values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_udf_txt_small +POSTHOOK: Lineage: decimal_udf_txt_small.key EXPRESSION [] +POSTHOOK: Lineage: decimal_udf_txt_small.value EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key + key FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -4018,44 +4094,45 @@ POSTHOOK: query: SELECT key + key FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-0.600 +-0.660 +-0.666 +-2.240 +-2.240 +-2.244 +-2469135780.246 +-2510.980 -8800.000 -NULL 0.000 0.000 -200.000 -20.000 -2.000 -0.200 -0.020 -400.000 -40.000 -4.000 0.000 -0.400 +0.020 0.040 +0.200 +0.400 0.600 -0.660 -0.666 --0.600 --0.660 --0.666 +0.660 +0.666 +2.000 +2.000 2.000 -4.000 -6.280 --2.240 --2.240 --2.244 2.240 2.244 +20.000 +200.000 +2469135780.246 248.000 250.400 --2510.980 +4.000 +4.000 +40.000 +400.000 6.280 6.280 6.280 -2.000 --2469135780.246 -2469135780.246 +6.280 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key + value FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -4135,44 +4212,45 @@ POSTHOOK: query: SELECT key + value FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-0.300 +-0.330 +-0.333 +-12.122 +-2.120 +-2.120 +-2469135780.123 +-2510.490 0.000 -NULL 0.000 0.000 -200.000 -20.000 -2.000 -0.100 -0.010 -400.000 -40.000 -4.000 0.000 -0.200 +0.010 0.020 +0.100 +0.200 0.300 0.330 0.333 --0.300 --0.330 --0.333 2.000 -4.000 -6.140 --2.120 --2.120 --12.122 +2.000 +2.000 2.120 2.122 +20.000 +200.000 +2469135780.123 248.000 250.200 --2510.490 +4.000 +4.000 +40.000 +400.000 +6.140 6.140 6.140 7.140 -2.000 --2469135780.123 -2469135780.123 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key + (value/2) FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -4252,44 +4330,45 @@ POSTHOOK: query: SELECT key + (value/2) FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-0.3 +-0.33 +-0.333 +-1.62 +-1.62 +-1.851851835123E9 +-1882.99 -2200.0 -NULL +-6.622 0.0 0.0 -150.0 -15.0 -1.5 -0.1 -0.01 -300.0 -30.0 -3.0 0.0 -0.2 +0.01 0.02 +0.1 +0.2 0.3 0.33 0.333 --0.3 --0.33 --0.333 1.5 -3.0 -4.640000000000001 --1.62 --1.62 --6.622 +1.5 +1.5 1.62 1.622 +1.851851835123E9 +15.0 +150.0 186.0 187.7 --1882.99 +3.0 +3.0 +30.0 +300.0 +4.640000000000001 4.640000000000001 4.640000000000001 5.140000000000001 -1.5 --1.851851835123E9 -1.851851835123E9 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key + '1.0' FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -4369,44 +4448,45 @@ POSTHOOK: query: SELECT key + '1.0' FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-0.1200000000000001 +-0.1200000000000001 +-0.12200000000000011 +-1.234567889123E9 +-1254.49 -4399.0 -NULL +0.667 +0.6699999999999999 +0.7 1.0 1.0 -101.0 -11.0 -2.0 -1.1 -1.01 -201.0 -21.0 -3.0 1.0 -1.2 +1.01 1.02 +1.1 +1.2 +1.234567891123E9 1.3 1.33 1.333 -0.7 -0.6699999999999999 -0.667 +101.0 +11.0 +125.0 +126.2 +2.0 +2.0 2.0 -3.0 -4.140000000000001 --0.1200000000000001 --0.1200000000000001 --0.12200000000000011 2.12 2.122 -125.0 -126.2 --1254.49 +201.0 +21.0 +3.0 +3.0 4.140000000000001 4.140000000000001 4.140000000000001 -2.0 --1.234567889123E9 -1.234567891123E9 +4.140000000000001 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key - key FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -4487,7 +4567,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### 0.000 -NULL 0.000 0.000 0.000 @@ -4524,6 +4603,8 @@ NULL 0.000 0.000 0.000 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key - value FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -4603,44 +4684,45 @@ POSTHOOK: query: SELECT key - value FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-0.120 +-0.120 +-0.123 +-0.300 +-0.330 +-0.333 +-0.490 +-0.860 -8800.000 -NULL 0.000 0.000 0.000 0.000 0.000 -0.100 -0.010 0.000 0.000 0.000 0.000 -0.200 -0.020 -0.300 -0.330 -0.333 --0.300 --0.330 --0.333 0.000 0.000 -0.140 --0.120 --0.120 -9.878 +0.000 +0.000 +0.010 +0.020 +0.100 0.120 0.122 -0.000 -0.200 --0.490 +0.123 0.140 0.140 --0.860 -0.000 --0.123 -0.123 +0.140 +0.200 +0.200 +0.300 +0.330 +0.333 +9.878 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key - (value/2) FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -4720,44 +4802,45 @@ POSTHOOK: query: SELECT key - (value/2) FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-0.3 +-0.33 +-0.333 +-0.6200000000000001 +-0.6200000000000001 +-6.172839451229999E8 +-627.99 -6600.0 -NULL 0.0 0.0 -50.0 -5.0 -0.5 -0.1 -0.01 -100.0 -10.0 -1.0 0.0 -0.2 +0.01 0.02 +0.1 +0.2 0.3 0.33 0.333 --0.3 --0.33 --0.333 0.5 -1.0 -1.6400000000000001 --0.6200000000000001 --0.6200000000000001 -4.378 +0.5 +0.5 0.6200000000000001 0.6220000000000001 -62.0 -62.7 --627.99 +1.0 +1.0 +1.1400000000000001 1.6400000000000001 1.6400000000000001 -1.1400000000000001 -0.5 --6.172839451229999E8 +1.6400000000000001 +10.0 +100.0 +4.378 +5.0 +50.0 6.172839451229999E8 +62.0 +62.7 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key - '1.0' FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -4837,44 +4920,45 @@ POSTHOOK: query: SELECT key - '1.0' FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### --4401.0 -NULL --1.0 --1.0 -99.0 -9.0 -0.0 +-0.667 +-0.6699999999999999 +-0.7 +-0.8 -0.9 +-0.98 -0.99 -199.0 -19.0 -1.0 -1.0 --0.8 --0.98 --0.7 --0.6699999999999999 --0.667 +-1.0 +-1.0 +-1.234567891123E9 -1.3 -1.33 -1.333 -0.0 -1.0 -2.14 +-1256.49 -2.12 -2.12 -2.122 +-4401.0 +0.0 +0.0 +0.0 0.1200000000000001 0.12200000000000011 +1.0 +1.0 +1.234567889123E9 123.0 124.2 --1256.49 +19.0 +199.0 2.14 2.14 2.14 -0.0 --1.234567891123E9 -1.234567889123E9 +2.14 +9.0 +99.0 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key * key FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -4954,44 +5038,45 @@ POSTHOOK: query: SELECT key * key FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### -19360000.000000 -NULL 0.000000 0.000000 -10000.000000 -100.000000 -1.000000 -0.010000 -0.000100 -40000.000000 -400.000000 -4.000000 0.000000 -0.040000 +0.000100 0.000400 +0.010000 +0.040000 0.090000 -0.108900 -0.110889 0.090000 0.108900 +0.108900 +0.110889 0.110889 1.000000 -4.000000 -9.859600 +1.000000 +1.000000 1.254400 1.254400 -1.258884 1.254400 1.258884 +1.258884 +100.000000 +10000.000000 +1524157875322755800.955129 +1524157875322755800.955129 15376.000000 15675.040000 1576255.140100 +19360000.000000 +4.000000 +4.000000 +400.000000 +40000.000000 9.859600 9.859600 9.859600 -1.000000 -1524157875322755800.955129 -1524157875322755800.955129 +9.859600 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key, value FROM DECIMAL_UDF_txt_small where key * value > 0 PREHOOK: type: QUERY @@ -5077,29 +5162,29 @@ POSTHOOK: query: SELECT key, value FROM DECIMAL_UDF_txt_small where key * value POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### -100.000 100 -10.000 10 -1.000 1 -200.000 200 -20.000 20 -2.000 2 -1.000 1 -2.000 2 -3.140 3 -1.120 -1 -1.120 -1 -1.122 -11 +-1234567890.123 -1234567890 +-1255.490 -1255 +1.000 1 +1.000 1 +1.000 1 1.120 1 1.122 1 +10.000 10 +100.000 100 +1234567890.123 1234567890 124.000 124 125.200 125 --1255.490 -1255 +2.000 2 +2.000 2 +20.000 20 +200.000 200 +3.140 3 3.140 3 3.140 3 3.140 4 -1.000 1 --1234567890.123 -1234567890 -1234567890.123 1234567890 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key * value FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -5180,17 +5265,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### -19360000.000 -NULL 0.000 0.000 -10000.000 -100.000 -1.000 0.000 0.000 -40000.000 -400.000 -4.000 0.000 0.000 0.000 @@ -5201,22 +5279,30 @@ NULL 0.000 0.000 1.000 -4.000 -9.420 +1.000 +1.000 1.120 1.120 -12.342 1.120 1.122 +100.000 +10000.000 +12.342 +12.560 +1524157875170903950.470 +1524157875170903950.470 15376.000 15650.000 1575639.950 +4.000 +4.000 +400.000 +40000.000 9.420 9.420 -12.560 -1.000 -1524157875170903950.470 -1524157875170903950.470 +9.420 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key * (value/2) FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -5296,44 +5382,45 @@ POSTHOOK: query: SELECT key * (value/2) FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-0.0 +-0.0 +-0.0 -9680000.0 -NULL 0.0 0.0 -5000.0 -50.0 -0.5 0.0 0.0 -20000.0 -200.0 -2.0 0.0 0.0 0.0 0.0 0.0 0.0 --0.0 --0.0 --0.0 0.5 -2.0 -4.71 +0.5 +0.5 0.56 0.56 -6.171 0.56 0.561 -7688.0 -7825.0 -787819.975 +2.0 +2.0 +200.0 +20000.0 +4.71 4.71 4.71 +50.0 +5000.0 +6.171 6.28 -0.5 7.620789375854519E17 7.620789375854519E17 +7688.0 +7825.0 +787819.975 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key * '2.0' FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -5413,49 +5500,50 @@ POSTHOOK: query: SELECT key * '2.0' FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-0.6 +-0.66 +-0.666 +-2.24 +-2.24 +-2.244 +-2.469135780246E9 +-2510.98 -8800.0 -NULL 0.0 0.0 -200.0 -20.0 -2.0 -0.2 -0.02 -400.0 -40.0 -4.0 0.0 -0.4 +0.02 0.04 +0.2 +0.4 0.6 0.66 0.666 --0.6 --0.66 --0.666 2.0 -4.0 -6.28 --2.24 --2.24 --2.244 +2.0 +2.0 2.24 2.244 +2.469135780246E9 +20.0 +200.0 248.0 250.4 --2510.98 +4.0 +4.0 +40.0 +400.0 6.28 6.28 6.28 -2.0 --2.469135780246E9 -2.469135780246E9 +6.28 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT key / 0 FROM DECIMAL_UDF_txt_small limit 1 +SELECT key / 0 FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT key / 0 FROM DECIMAL_UDF_txt_small limit 1 +SELECT key / 0 FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -5487,22 +5575,16 @@ STAGE PLANS: projectedOutputColumnNums: [3] selectExpressions: DecimalColDivideDecimalScalar(col 0:decimal(15,3), val 0) -> 3:decimal(18,6) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 - Limit Vectorization: - className: VectorLimitOperator - native: true + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -5524,19 +5606,57 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator - limit: 1 + limit: -1 Processor Tree: ListSink -PREHOOK: query: SELECT key / 0 FROM DECIMAL_UDF_txt_small limit 1 +PREHOOK: query: SELECT key / 0 FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY PREHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### -POSTHOOK: query: SELECT key / 0 FROM DECIMAL_UDF_txt_small limit 1 +POSTHOOK: query: SELECT key / 0 FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key / key FROM DECIMAL_UDF_txt_small WHERE key is not null and key <> 0 PREHOOK: type: QUERY @@ -5744,6 +5864,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### -1.00000000000000 +0.10200000000000 +0.78500000000000 +1.00000000000000 1.00000000000000 1.00000000000000 1.00000000000000 @@ -5752,21 +5875,18 @@ POSTHOOK: Input: default@decimal_udf_txt_small 1.00000000000000 1.00000000000000 1.00000000000000 +1.00000000000000 +1.00000000009963 +1.00000000009963 +1.00039043824701 +1.00160000000000 +1.04666666666667 +1.04666666666667 1.04666666666667 1.12000000000000 1.12000000000000 -0.10200000000000 1.12000000000000 1.12200000000000 -1.00000000000000 -1.00160000000000 -1.00039043824701 -1.04666666666667 -1.04666666666667 -0.78500000000000 -1.00000000000000 -1.00000000009963 -1.00000000009963 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key / (value/2) FROM DECIMAL_UDF_txt_small WHERE value is not null and value <> 0 PREHOOK: type: QUERY @@ -5854,6 +5974,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### -2.0 +0.20400000000000001 +1.57 +2.0 2.0 2.0 2.0 @@ -5862,21 +5985,18 @@ POSTHOOK: Input: default@decimal_udf_txt_small 2.0 2.0 2.0 +2.0 +2.0000000001992597 +2.0000000001992597 +2.000780876494024 +2.0032 +2.0933333333333333 +2.0933333333333333 2.0933333333333333 2.24 2.24 -0.20400000000000001 2.24 2.244 -2.0 -2.0032 -2.000780876494024 -2.0933333333333333 -2.0933333333333333 -1.57 -2.0 -2.0000000001992597 -2.0000000001992597 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT 1 + (key / '2.0') FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -5957,43 +6077,44 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### -2199.0 -NULL +-6.172839440615E8 +-626.745 +0.43899999999999995 +0.43999999999999995 +0.43999999999999995 +0.8335 +0.835 +0.85 1.0 1.0 -51.0 -6.0 -1.5 -1.05 -1.005 -101.0 -11.0 -2.0 1.0 -1.1 +1.005 1.01 +1.05 +1.1 1.15 1.165 1.1665 -0.85 -0.835 -0.8335 1.5 -2.0 -2.5700000000000003 -0.43999999999999995 -0.43999999999999995 -0.43899999999999995 +1.5 +1.5 1.56 1.561 -63.0 -63.6 --626.745 +101.0 +11.0 +2.0 +2.0 2.5700000000000003 2.5700000000000003 2.5700000000000003 -1.5 --6.172839440615E8 +2.5700000000000003 +51.0 +6.0 6.172839460615E8 +63.0 +63.6 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT abs(key) FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -6073,44 +6194,45 @@ POSTHOOK: query: SELECT abs(key) FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### -4400.000 -NULL 0.000 0.000 -100.000 -10.000 -1.000 -0.100 -0.010 -200.000 -20.000 -2.000 0.000 -0.200 +0.010 0.020 +0.100 +0.200 0.300 -0.330 -0.333 0.300 0.330 +0.330 +0.333 0.333 1.000 -2.000 -3.140 +1.000 +1.000 1.120 1.120 -1.122 1.120 1.122 +1.122 +10.000 +100.000 +1234567890.123 +1234567890.123 124.000 125.200 1255.490 +2.000 +2.000 +20.000 +200.000 3.140 3.140 3.140 -1.000 -1234567890.123 -1234567890.123 +3.140 +4400.000 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF_txt_small GROUP BY value ORDER BY value PREHOOK: type: QUERY @@ -6293,23 +6415,24 @@ POSTHOOK: query: SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DE POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-1 -1.1200000000000000 -1.1200000 -2.240 +-11 -1.1220000000000000 -1.1220000 -1.122 -1234567890 -1234567890.1230000000000000 -1234567890.1230000 -1234567890.123 -1255 -1255.4900000000000000 -1255.4900000 -1255.490 --11 -1.1220000000000000 -1.1220000 -1.122 --1 -1.1200000000000000 -1.1200000 -2.240 0 0.0253846153846154 0.0253846 0.330 1 1.0484000000000000 1.0484000 5.242 -2 2.0000000000000000 2.0000000 4.000 -3 3.1400000000000000 3.1400000 9.420 -4 3.1400000000000000 3.1400000 3.140 10 10.0000000000000000 10.0000000 10.000 -20 20.0000000000000000 20.0000000 20.000 100 100.0000000000000000 100.0000000 100.000 +1234567890 1234567890.1230000000000000 1234567890.1230000 1234567890.123 124 124.0000000000000000 124.0000000 124.000 125 125.2000000000000000 125.2000000 125.200 +2 2.0000000000000000 2.0000000 4.000 +20 20.0000000000000000 20.0000000 20.000 200 200.0000000000000000 200.0000000 200.000 +3 3.1400000000000000 3.1400000 9.420 +4 3.1400000000000000 3.1400000 3.140 4400 -4400.0000000000000000 -4400.0000000 -4400.000 -1234567890 1234567890.1230000000000000 1234567890.1230000 1234567890.123 +NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT -key FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -6389,44 +6512,45 @@ POSTHOOK: query: SELECT -key FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### -4400.000 -NULL -0.000 -0.000 --100.000 --10.000 --1.000 --0.100 -0.010 --200.000 --20.000 --2.000 -0.000 --0.200 -0.020 +-0.100 +-0.200 -0.300 -0.330 -0.333 -0.300 -0.330 -0.333 -1.000 --2.000 --3.140 -1.120 -1.120 -1.122 +-1.000 +-1.000 -1.120 -1.122 +-10.000 +-100.000 +-1234567890.123 -124.000 -125.200 -1255.490 +-2.000 +-2.000 +-20.000 +-200.000 -3.140 -3.140 -3.140 --1.000 +-3.140 +0.000 +0.000 +0.000 +0.300 +0.330 +0.333 +1.120 +1.120 +1.122 1234567890.123 --1234567890.123 +1255.490 +4400.000 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT +key FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -6460,44 +6584,45 @@ POSTHOOK: query: SELECT +key FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-0.300 +-0.330 +-0.333 +-1.120 +-1.120 +-1.122 +-1234567890.123 +-1255.490 -4400.000 -NULL 0.000 0.000 -100.000 -10.000 -1.000 -0.100 -0.010 -200.000 -20.000 -2.000 0.000 -0.200 +0.010 0.020 +0.100 +0.200 0.300 0.330 0.333 --0.300 --0.330 --0.333 1.000 -2.000 -3.140 --1.120 --1.120 --1.122 +1.000 +1.000 1.120 1.122 +10.000 +100.000 +1234567890.123 124.000 125.200 --1255.490 +2.000 +2.000 +20.000 +200.000 3.140 3.140 3.140 -1.000 --1234567890.123 -1234567890.123 +3.140 +NULL +NULL PREHOOK: query: EXPlAIN SELECT CEIL(key) FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY POSTHOOK: query: EXPlAIN SELECT CEIL(key) FROM DECIMAL_UDF_txt_small @@ -6544,44 +6669,45 @@ POSTHOOK: query: SELECT CEIL(key) FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-1 +-1 +-1 +-1234567890 +-1255 -4400 -NULL 0 0 -100 -10 +0 +0 +0 +0 1 1 1 -200 -20 -2 -0 1 1 1 1 1 -0 -0 -0 1 +1 +10 +100 +1234567891 +124 +126 2 -4 --1 --1 --1 2 2 -124 -126 --1255 +2 +20 +200 4 4 4 -1 --1234567890 -1234567891 +4 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT FLOOR(key) FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -6661,44 +6787,45 @@ POSTHOOK: query: SELECT FLOOR(key) FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-1 +-1 +-1 +-1234567891 +-1256 +-2 +-2 +-2 -4400 -NULL 0 0 -100 -10 -1 0 0 -200 -20 -2 0 0 0 0 0 0 --1 --1 --1 1 -2 -3 --2 --2 --2 1 1 +1 +1 +10 +100 +1234567890 124 125 --1256 +2 +2 +20 +200 3 3 3 -1 --1234567891 -1234567890 +3 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ROUND(key, 2) FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -6778,44 +6905,45 @@ POSTHOOK: query: SELECT ROUND(key, 2) FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-0.30 +-0.33 +-0.33 +-1.12 +-1.12 +-1.12 +-1234567890.12 +-1255.49 -4400.00 -NULL 0.00 0.00 -100.00 -10.00 -1.00 -0.10 -0.01 -200.00 -20.00 -2.00 0.00 -0.20 +0.01 0.02 +0.10 +0.20 0.30 0.33 0.33 --0.30 --0.33 --0.33 1.00 -2.00 -3.14 --1.12 --1.12 --1.12 +1.00 +1.00 1.12 1.12 +10.00 +100.00 +1234567890.12 124.00 125.20 --1255.49 +2.00 +2.00 +20.00 +200.00 3.14 3.14 3.14 -1.00 --1234567890.12 -1234567890.12 +3.14 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT POWER(key, 2) FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -6895,44 +7023,45 @@ POSTHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### -1.936E7 -NULL 0.0 0.0 -10000.0 -100.0 -1.0 -0.010000000000000002 -1.0E-4 -40000.0 -400.0 -4.0 0.0 +0.010000000000000002 0.04000000000000001 -4.0E-4 0.09 -0.10890000000000001 -0.11088900000000002 0.09 0.10890000000000001 +0.10890000000000001 +0.11088900000000002 0.11088900000000002 1.0 -4.0 -9.8596 +1.0 +1.0 +1.0E-4 1.2544000000000002 1.2544000000000002 -1.2588840000000003 1.2544000000000002 1.2588840000000003 +1.2588840000000003 +1.52415787532275558E18 +1.52415787532275558E18 +1.936E7 +100.0 +10000.0 15376.0 15675.04 1576255.1401 +4.0 +4.0 +4.0E-4 +400.0 +40000.0 9.8596 9.8596 9.8596 -1.0 -1.52415787532275558E18 -1.52415787532275558E18 +9.8596 +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY @@ -7012,44 +7141,45 @@ POSTHOOK: query: SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### +-0.120000 +-0.120000 +-0.122000 -2199.000000 -NULL -NULL -NULL -1.000000 -1.000000 +-617283944.061500 +-626.745000 0.000000 0.000000 0.000000 -1.000000 -1.000000 0.000000 -NULL 0.000000 0.000000 -0.100000 -0.010000 -0.001000 -0.100000 -0.010000 -0.001000 0.000000 0.000000 -1.000000 --0.120000 --0.120000 --0.122000 -0.440000 +0.000000 +0.001000 +0.001000 +0.010000 +0.010000 +0.100000 +0.100000 0.439000 +0.440000 +1.000000 +1.000000 1.000000 1.000000 --626.745000 1.000000 1.000000 1.000000 -0.000000 --617283944.061500 1.000000 +1.000000 +1.000000 +1.000000 +NULL +NULL +NULL +NULL +NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF_txt_small GROUP BY value PREHOOK: type: QUERY @@ -7187,23 +7317,24 @@ POSTHOOK: query: SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF_txt_s POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### -4 0.0 0.0 +-1 0.0 0.0 +-11 0.0 0.0 -1234567890 0.0 0.0 +-1255 0.0 0.0 0 0.22561046704494161 0.050900082840236685 1 0.05928102563215321 0.0035142400000000066 -2 0.0 0.0 -3 0.0 0.0 -124 0.0 0.0 -200 0.0 0.0 -4400 0.0 0.0 -1234567890 0.0 0.0 10 0.0 0.0 +100 0.0 0.0 +1234567890 0.0 0.0 +124 0.0 0.0 125 0.0 0.0 --1255 0.0 0.0 --11 0.0 0.0 --1 0.0 0.0 +2 0.0 0.0 20 0.0 0.0 -100 0.0 0.0 +200 0.0 0.0 +3 0.0 0.0 +4 0.0 0.0 +4400 0.0 0.0 +NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF_txt_small GROUP BY value PREHOOK: type: QUERY @@ -7341,23 +7472,24 @@ POSTHOOK: query: SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF_ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### -4 NULL NULL +-1 0.0 0.0 +-11 NULL NULL -1234567890 NULL NULL +-1255 NULL NULL 0 0.2348228191855647 0.055141756410256405 1 0.06627820154470102 0.004392800000000008 -2 0.0 0.0 -3 0.0 0.0 -124 NULL NULL -200 NULL NULL -4400 NULL NULL -1234567890 NULL NULL 10 NULL NULL +100 NULL NULL +1234567890 NULL NULL +124 NULL NULL 125 NULL NULL --1255 NULL NULL --11 NULL NULL --1 0.0 0.0 +2 0.0 0.0 20 NULL NULL -100 NULL NULL +200 NULL NULL +3 0.0 0.0 +4 NULL NULL +4400 NULL NULL +NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF_txt_small PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out index 90f68f5..75782f8 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out @@ -48,6 +48,16 @@ POSTHOOK: Input: default@decimal_udf2_txt POSTHOOK: Output: default@decimal_udf2 POSTHOOK: Lineage: decimal_udf2.key SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:key, type:decimal(14,5), comment:null), ] POSTHOOK: Lineage: decimal_udf2.value SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:value, type:int, comment:null), ] +PREHOOK: query: insert into DECIMAL_UDF2 values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_udf2 +POSTHOOK: query: insert into DECIMAL_UDF2 values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_udf2 +POSTHOOK: Lineage: decimal_udf2.key EXPRESSION [] +POSTHOOK: Lineage: decimal_udf2.value EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 @@ -73,7 +83,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf2 - Statistics: Num rows: 38 Data size: 4032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4032 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(14,5), 1:value:int, 2:ROW__ID:struct] @@ -83,7 +93,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColEqualDecimalScalar(col 0:decimal(14,5), val 10) predicate: (key = 10) (type: boolean) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: null (type: double), null (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -92,13 +102,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9] selectExpressions: ConstantVectorExpression(val null) -> 3:double, ConstantVectorExpression(val null) -> 4:double, ConstantVectorExpression(val 1.4711276743037347) -> 5:double, ConstantVectorExpression(val -0.8390715290764524) -> 6:double, ConstantVectorExpression(val -0.5440211108893698) -> 7:double, ConstantVectorExpression(val 0.6483608274590866) -> 8:double, ConstantVectorExpression(val 0.17453292519943295) -> 9:double - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -138,6 +148,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### NULL NULL 1.4711276743037347 -0.8390715290764524 -0.5440211108893698 0.6483608274590866 0.17453292519943295 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) +FROM DECIMAL_UDF2) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) +FROM DECIMAL_UDF2) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf2 +#### A masked pattern was here #### +-3806952922 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), @@ -169,7 +192,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf2 - Statistics: Num rows: 38 Data size: 4184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(14,5), 1:value:int, 2:ROW__ID:struct] @@ -179,7 +202,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColEqualDecimalScalar(col 0:decimal(14,5), val 10) predicate: (key = 10) (type: boolean) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -188,13 +211,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10] selectExpressions: ConstantVectorExpression(val 22026.465794806718) -> 3:double, ConstantVectorExpression(val 2.302585092994046) -> 4:double, ConstantVectorExpression(val 2.302585092994046) -> 5:double, ConstantVectorExpression(val 1.0) -> 6:double, FuncLogWithBaseLongToDouble(col 1:double) -> 7:double, VectorUDFAdaptor(log(value, 10)) -> 8:double, ConstantVectorExpression(val 1.0) -> 9:double, ConstantVectorExpression(val 3.1622776601683795) -> 10:double - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -240,6 +263,25 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### 22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF2) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF2) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf2 +#### A masked pattern was here #### +1514360349 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2_txt WHERE key = 10 @@ -331,6 +373,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2_txt #### A masked pattern was here #### NULL NULL 1.4711276743037347 -0.8390715290764524 -0.5440211108893698 0.6483608274590866 0.17453292519943295 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) +FROM DECIMAL_UDF2_txt) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf2_txt +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) +FROM DECIMAL_UDF2_txt) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf2_txt +#### A masked pattern was here #### +-3806952922 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), @@ -434,6 +489,25 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2_txt #### A masked pattern was here #### 22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF2_txt) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf2_txt +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF2_txt) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf2_txt +#### A masked pattern was here #### +1514360349 PREHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF2_txt PREHOOK: type: DROPTABLE PREHOOK: Input: default@decimal_udf2_txt diff --git ql/src/test/results/clientpositive/llap/vector_empty_where.q.out ql/src/test/results/clientpositive/llap/vector_empty_where.q.out new file mode 100644 index 0000000..494c5c9 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_empty_where.q.out @@ -0,0 +1,652 @@ +PREHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cstring1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cstring1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 13:boolean)(children: CastStringToBoolean(col 6) -> 13:boolean) + predicate: cstring1 (type: string) + Statistics: Num rows: 6144 Data size: 449620 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 6144 Data size: 449620 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where cstring1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where cstring1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +6041 +PREHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cint +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cint +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 13:boolean)(children: CastLongToBooleanViaLongToLong(col 2:int) -> 13:boolean) + predicate: cint (type: int) + Statistics: Num rows: 6144 Data size: 18348 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +6082 +PREHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cfloat +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where cfloat +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 13:boolean)(children: CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 13:boolean) + predicate: cfloat (type: float) + Statistics: Num rows: 6144 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 6144 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where cfloat +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where cfloat +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3022 +PREHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where ctimestamp1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count (distinct cint) from alltypesorc where ctimestamp1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 528216 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 13:boolean)(children: CastTimestampToBoolean(col 8:timestamp) -> 13:boolean) + predicate: ctimestamp1 (type: timestamp) + Statistics: Num rows: 6144 Data size: 264108 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 6144 Data size: 264108 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3022 diff --git ql/src/test/results/clientpositive/llap/vector_gather_stats.q.out ql/src/test/results/clientpositive/llap/vector_gather_stats.q.out new file mode 100644 index 0000000..e777242 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_gather_stats.q.out @@ -0,0 +1,108 @@ +PREHOOK: query: create table cd +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +partitioned by +( + cd_education_status string +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@cd +POSTHOOK: query: create table cd +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +partitioned by +( + cd_education_status string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cd +PREHOOK: query: alter table cd add partition (cd_education_status='Primary') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@cd +POSTHOOK: query: alter table cd add partition (cd_education_status='Primary') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@cd +POSTHOOK: Output: default@cd@cd_education_status=Primary +PREHOOK: query: insert into table cd partition (cd_education_status='Primary') values (1, 'M', 'M', 500, 'Good', 0, 0, 0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@cd@cd_education_status=Primary +POSTHOOK: query: insert into table cd partition (cd_education_status='Primary') values (1, 'M', 'M', 500, 'Good', 0, 0, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@cd@cd_education_status=Primary +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_credit_rating SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_demo_sk SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_dep_college_count SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_dep_count SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_dep_employed_count SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_gender SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_marital_status SCRIPT [] +POSTHOOK: Lineage: cd PARTITION(cd_education_status=Primary).cd_purchase_estimate SCRIPT [] +PREHOOK: query: explain vectorization detail +analyze table cd partition (cd_education_status) compute statistics +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +analyze table cd partition (cd_education_status) compute statistics +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: cd + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: TABLESCAN operator: gather stats not supported + vectorized: false + + Stage: Stage-2 + Stats Work + Basic Stats Work: + +PREHOOK: query: analyze table cd partition (cd_education_status) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@cd +PREHOOK: Input: default@cd@cd_education_status=Primary +PREHOOK: Output: default@cd +PREHOOK: Output: default@cd@cd_education_status=Primary +POSTHOOK: query: analyze table cd partition (cd_education_status) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cd +POSTHOOK: Input: default@cd@cd_education_status=Primary +POSTHOOK: Output: default@cd +POSTHOOK: Output: default@cd@cd_education_status=Primary diff --git ql/src/test/results/clientpositive/llap/vector_if_expr_2.q.out ql/src/test/results/clientpositive/llap/vector_if_expr_2.q.out new file mode 100644 index 0000000..f4baa69 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_if_expr_2.q.out @@ -0,0 +1,136 @@ +PREHOOK: query: drop table if exists foo +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists foo +POSTHOOK: type: DROPTABLE +PREHOOK: query: create temporary table foo (x int, y int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create temporary table foo (x int, y int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: insert into foo values(1,1),(2,NULL),(3,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@foo +POSTHOOK: query: insert into foo values(1,1),(2,NULL),(3,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.x SCRIPT [] +POSTHOOK: Lineage: foo.y SCRIPT [] +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +select x, IF(x > 0,y,0) from foo order by x +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +select x, IF(x > 0,y,0) from foo order by x +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: foo + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Select Operator + expressions: x (type: int), if((x > 0), y, 0) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4] + selectExpressions: IfExprLongColumnLongScalar(col 3:boolean, col 1:int, val 0)(children: LongColGreaterLongScalar(col 0:int, val 0) -> 3:boolean) -> 4:int + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select x, IF(x > 0,y,0) from foo order by x +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select x, IF(x > 0,y,0) from foo order by x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +1 1 +2 NULL +3 1 +PREHOOK: query: select x, IF(x > 0,y,0) from foo order by x +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select x, IF(x > 0,y,0) from foo order by x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +1 1 +2 NULL +3 1 diff --git ql/src/test/results/clientpositive/llap/vector_interval_1.q.out ql/src/test/results/clientpositive/llap/vector_interval_1.q.out index 1be7232..7d891db 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_1.q.out @@ -24,6 +24,7 @@ POSTHOOK: Lineage: vector_interval_1.dt SIMPLE [] POSTHOOK: Lineage: vector_interval_1.str1 SIMPLE [] POSTHOOK: Lineage: vector_interval_1.str2 SIMPLE [] POSTHOOK: Lineage: vector_interval_1.ts SIMPLE [] +_c0 _c1 _c2 _c3 PREHOOK: query: insert into vector_interval_1 select null, null, null, null from src limit 1 PREHOOK: type: QUERY @@ -38,6 +39,18 @@ POSTHOOK: Lineage: vector_interval_1.dt EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION [] +_col0 _col1 _col2 _col3 +PREHOOK: query: select * from vector_interval_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from vector_interval_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +vector_interval_1.ts vector_interval_1.dt vector_interval_1.str1 vector_interval_1.str2 +2001-01-01 01:02:03 2001-01-01 1-2 1 2:3:4 +NULL NULL NULL NULL PREHOOK: query: explain vectorization expression select str1, @@ -52,6 +65,7 @@ select interval '1 2:3:4' day to second, interval_day_time(str2) from vector_interval_1 order by str1 POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -155,6 +169,7 @@ from vector_interval_1 order by str1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +str1 _c1 _c2 _c3 _c4 NULL 1-2 NULL 1 02:03:04.000000000 NULL 1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000 PREHOOK: query: explain vectorization expression @@ -179,6 +194,7 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -290,6 +306,7 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 _c4 _c5 _c6 NULL 2-4 NULL NULL 0-0 NULL NULL 2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0 PREHOOK: query: explain vectorization expression @@ -314,6 +331,7 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -425,6 +443,7 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 _c4 _c5 _c6 NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL NULL 2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2 04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: explain vectorization expression @@ -461,6 +480,7 @@ select dt - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -583,6 +603,7 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01 1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56 2000-12-30 21:56:56 PREHOOK: query: explain vectorization expression @@ -619,6 +640,7 @@ select ts - interval_day_time(str2) from vector_interval_1 order by ts POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -741,6 +763,7 @@ from vector_interval_1 order by ts POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +ts _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03 1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59 2000-12-30 22:58:59 PREHOOK: query: explain vectorization expression @@ -759,6 +782,7 @@ select ts - timestamp '2001-01-01 01:02:03' from vector_interval_1 order by ts POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -863,6 +887,7 @@ from vector_interval_1 order by ts POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +ts _c1 _c2 _c3 NULL NULL NULL NULL 2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: explain vectorization expression @@ -881,6 +906,7 @@ select dt - date '2001-01-01' from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -985,6 +1011,7 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 NULL NULL NULL NULL 2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: explain vectorization expression @@ -1009,6 +1036,7 @@ select date '2001-01-01' - ts from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -1119,5 +1147,6 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 _c4 _c5 _c6 NULL NULL NULL NULL NULL NULL NULL 2001-01-01 0 01:02:03.000000000 0 01:02:03.000000000 0 01:02:03.000000000 -0 01:02:03.000000000 -0 01:02:03.000000000 -0 01:02:03.000000000 diff --git ql/src/test/results/clientpositive/llap/vector_join.q.out ql/src/test/results/clientpositive/llap/vector_join.q.out new file mode 100644 index 0000000..94c0290 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_join.q.out @@ -0,0 +1,104 @@ +PREHOOK: query: DROP TABLE IF EXISTS test1_vc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test1_vc +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS test2_vc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test2_vc +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test1_vc + ( + id string) + PARTITIONED BY ( + cr_year bigint, + cr_month bigint) + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' +TBLPROPERTIES ( + 'serialization.null.format'='' ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test1_vc +POSTHOOK: query: CREATE TABLE test1_vc + ( + id string) + PARTITIONED BY ( + cr_year bigint, + cr_month bigint) + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' +TBLPROPERTIES ( + 'serialization.null.format'='' ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1_vc +PREHOOK: query: CREATE TABLE test2_vc( + id string + ) + PARTITIONED BY ( + cr_year bigint, + cr_month bigint) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' +TBLPROPERTIES ( + 'serialization.null.format'='' + ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2_vc +POSTHOOK: query: CREATE TABLE test2_vc( + id string + ) + PARTITIONED BY ( + cr_year bigint, + cr_month bigint) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' +TBLPROPERTIES ( + 'serialization.null.format'='' + ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2_vc +PREHOOK: query: SELECT cr.id1 , +cr.id2 +FROM +(SELECT t1.id id1, + t2.id id2 + from + (select * from test1_vc ) t1 + left outer join test2_vc t2 + on t1.id=t2.id) cr +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_vc +PREHOOK: Input: default@test2_vc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cr.id1 , +cr.id2 +FROM +(SELECT t1.id id1, + t2.id id2 + from + (select * from test1_vc ) t1 + left outer join test2_vc t2 + on t1.id=t2.id) cr +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_vc +POSTHOOK: Input: default@test2_vc +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out new file mode 100644 index 0000000..5e168a9 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out @@ -0,0 +1,175 @@ +PREHOOK: query: drop table if exists char_part_tbl1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_part_tbl1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_part_tbl2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_part_tbl2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age) into 2 buckets stored as orc tblproperties('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studenttab +POSTHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age) into 2 buckets stored as orc tblproperties('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studenttab +PREHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00), ('(yuri xylophone',30,2.74),('alice underhill',46,3.50) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@studenttab +POSTHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00), ('(yuri xylophone',30,2.74),('alice underhill',46,3.50) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@studenttab +POSTHOOK: Lineage: studenttab.age SCRIPT [] +POSTHOOK: Lineage: studenttab.gpa SCRIPT [] +POSTHOOK: Lineage: studenttab.name SCRIPT [] +PREHOOK: query: create table char_tbl1(name string, age int) partitioned by(gpa char(50)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_tbl1 +POSTHOOK: query: create table char_tbl1(name string, age int) partitioned by(gpa char(50)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_tbl1 +PREHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_tbl2 +POSTHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_tbl2 +PREHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl1@gpa=3.5 +POSTHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl1@gpa=3.5 +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab where gpa = 2.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl1@gpa=2.5 +POSTHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab where gpa = 2.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl1@gpa=2.5 +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl2@gpa=3.5 +POSTHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl2@gpa=3.5 +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab where gpa = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl2@gpa=3 +POSTHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab where gpa = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl2@gpa=3 +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: show partitions char_tbl1 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@char_tbl1 +POSTHOOK: query: show partitions char_tbl1 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@char_tbl1 +gpa=2.5 +gpa=3.5 +PREHOOK: query: show partitions char_tbl2 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@char_tbl2 +POSTHOOK: query: show partitions char_tbl2 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@char_tbl2 +gpa=3 +gpa=3.5 +PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 llap + File Output Operator [FS_10] + Merge Join Operator [MERGEJOIN_21] (rows=2 width=429) + Conds:RS_23._col2=RS_28._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_23] + PartitionCols:_col2 + Select Operator [SEL_22] (rows=2 width=237) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=2 width=237) + default@char_tbl1,c1,Tbl:COMPLETE,Col:COMPLETE,Output:["name","age"] + Dynamic Partitioning Event Operator [EVENT_26] (rows=1 width=237) + Group By Operator [GBY_25] (rows=1 width=237) + Output:["_col0"],keys:_col0 + Select Operator [SEL_24] (rows=2 width=237) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_22] + <-Map 3 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_28] + PartitionCols:_col2 + Select Operator [SEL_27] (rows=2 width=192) + Output:["_col0","_col1","_col2"] + TableScan [TS_3] (rows=2 width=192) + default@char_tbl2,c2,Tbl:COMPLETE,Col:COMPLETE,Output:["name","age"] + +PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +PREHOOK: Input: default@char_tbl1 +PREHOOK: Input: default@char_tbl1@gpa=2.5 +PREHOOK: Input: default@char_tbl1@gpa=3.5 +PREHOOK: Input: default@char_tbl2 +PREHOOK: Input: default@char_tbl2@gpa=3 +PREHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_tbl1 +POSTHOOK: Input: default@char_tbl1@gpa=2.5 +POSTHOOK: Input: default@char_tbl1@gpa=3.5 +POSTHOOK: Input: default@char_tbl2 +POSTHOOK: Input: default@char_tbl2@gpa=3 +POSTHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +alice underhill 46 3.5 alice underhill 46 3.5 +PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +PREHOOK: Input: default@char_tbl1 +PREHOOK: Input: default@char_tbl1@gpa=2.5 +PREHOOK: Input: default@char_tbl1@gpa=3.5 +PREHOOK: Input: default@char_tbl2 +PREHOOK: Input: default@char_tbl2@gpa=3 +PREHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_tbl1 +POSTHOOK: Input: default@char_tbl1@gpa=2.5 +POSTHOOK: Input: default@char_tbl1@gpa=3.5 +POSTHOOK: Input: default@char_tbl2 +POSTHOOK: Input: default@char_tbl2@gpa=3 +POSTHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +alice underhill 46 3.5 alice underhill 46 3.5 diff --git ql/src/test/results/clientpositive/llap/vector_like_2.q.out ql/src/test/results/clientpositive/llap/vector_like_2.q.out new file mode 100644 index 0000000..5c62e31 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_like_2.q.out @@ -0,0 +1,151 @@ +PREHOOK: query: drop table if exists foo +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists foo +POSTHOOK: type: DROPTABLE +PREHOOK: query: create temporary table foo (a string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create temporary table foo (a string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: insert into foo values("some foo"),("some bar"),(null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@foo +POSTHOOK: query: insert into foo values("some foo"),("some bar"),(null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.a SCRIPT [] +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +select a, a like "%bar" from foo order by a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +select a, a like "%bar" from foo order by a +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: foo + Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:string, 1:ROW__ID:struct] + Select Operator + expressions: a (type: string), (a like '%bar') (type: boolean) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2] + selectExpressions: SelectStringColLikeStringScalar(col 0:string) -> 2:boolean + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: a:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, a like "%bar" from foo order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select a, a like "%bar" from foo order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +NULL NULL +some bar true +some foo false +PREHOOK: query: select a, a like "%bar" from foo order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select a, a like "%bar" from foo order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +NULL NULL +some bar true +some foo false diff --git ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out new file mode 100644 index 0000000..d02fa08 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out @@ -0,0 +1,51 @@ +PREHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cint) IN (UDFToInteger(ctinyint), UDFToInteger(cbigint)) (type: boolean) + Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: FILTER operator: Vectorizing IN expression only supported for constant values + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out new file mode 100644 index 0000000..ee0e664 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out @@ -0,0 +1,274 @@ +PREHOOK: query: CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@non_string_part +POSTHOOK: query: CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@non_string_part +PREHOOK: query: INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc +WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@non_string_part +POSTHOOK: query: INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc +WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +PREHOOK: query: SHOW PARTITIONS non_string_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@non_string_part +POSTHOOK: query: SHOW PARTITIONS non_string_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@non_string_part +ctinyint=__HIVE_DEFAULT_PARTITION__ +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: non_string_part + Statistics: Num rows: 3073 Data size: 24584 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 0) + predicate: (cint > 0) (type: boolean) + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int), ctinyint (type: tinyint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4] + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: tinyint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: tinyint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@non_string_part +PREHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_string_part +POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +762 NULL +762 NULL +6981 NULL +6981 NULL +6981 NULL +86028 NULL +504142 NULL +799471 NULL +1248059 NULL +1286921 NULL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: non_string_part + Statistics: Num rows: 3073 Data size: 313446 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 0) + predicate: (cint > 0) (type: boolean) + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int), cstring1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@non_string_part +PREHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_string_part +POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +762 3WsVeqb28VWEEOLI8ail +762 40ks5556SV +6981 1FNNhmiFLGw425NA13g +6981 o5mb0QP5Y48Qd4vdB0 +6981 sF2CRfgt2K +86028 T2o8XRFAL0HC4ikDQnfoCymw +504142 PlOxor04p5cvVl +799471 2fu24 +1248059 Uhps6mMh3IfHB3j7yH62K +1286921 ODLrXI8882q8LS8 diff --git ql/src/test/results/clientpositive/llap/vector_orc_string_reader_empty_dict.q.out ql/src/test/results/clientpositive/llap/vector_orc_string_reader_empty_dict.q.out new file mode 100644 index 0000000..4f00bed --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_orc_string_reader_empty_dict.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: create table orcstr (vcol varchar(20)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orcstr +POSTHOOK: query: create table orcstr (vcol varchar(20)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcstr +PREHOOK: query: insert overwrite table orcstr select null from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcstr +POSTHOOK: query: insert overwrite table orcstr select null from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcstr +POSTHOOK: Lineage: orcstr.vcol EXPRESSION [] +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +NULL +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +NULL +PREHOOK: query: insert overwrite table orcstr select "" from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcstr +POSTHOOK: query: insert overwrite table orcstr select "" from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcstr +POSTHOOK: Lineage: orcstr.vcol EXPRESSION [] +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### + +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### + diff --git ql/src/test/results/clientpositive/llap/vector_order_null.q.out ql/src/test/results/clientpositive/llap/vector_order_null.q.out new file mode 100644 index 0000000..9ff8f87 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_order_null.q.out @@ -0,0 +1,1427 @@ +PREHOOK: query: create table src_null (a int, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_null +POSTHOOK: query: create table src_null (a int, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_null +PREHOOK: query: insert into src_null values (1, 'A') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (1, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a SCRIPT [] +POSTHOOK: Lineage: src_null.b SCRIPT [] +col1 col2 +PREHOOK: query: insert into src_null values (null, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [] +POSTHOOK: Lineage: src_null.b EXPRESSION [] +_col0 _col1 +PREHOOK: query: insert into src_null values (3, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (3, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a SCRIPT [] +POSTHOOK: Lineage: src_null.b EXPRESSION [] +_col0 _col1 +PREHOOK: query: insert into src_null values (2, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a SCRIPT [] +POSTHOOK: Lineage: src_null.b EXPRESSION [] +_col0 _col1 +PREHOOK: query: insert into src_null values (2, 'A') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a SCRIPT [] +POSTHOOK: Lineage: src_null.b SCRIPT [] +col1 col2 +PREHOOK: query: insert into src_null values (2, 'B') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, 'B') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a SCRIPT [] +POSTHOOK: Lineage: src_null.b SCRIPT [] +col1 col2 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc, b asc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc, b asc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc, b asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc, b asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +NULL NULL +1 A +2 NULL +2 A +2 B +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc, b asc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc, b asc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: -+ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: za + reduceColumnSortOrder: -+ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc, b asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc, b asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +3 NULL +2 NULL +2 A +2 B +1 A +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 NULL +3 NULL +NULL NULL +1 A +2 A +2 B +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc, a asc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc, a asc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: -+ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: za + reduceColumnSortOrder: -+ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc, a asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc, a asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 B +1 A +2 A +NULL NULL +2 NULL +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +NULL NULL +1 A +2 NULL +2 A +2 B +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: -+ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: -+ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +NULL NULL +3 NULL +2 NULL +2 A +2 B +1 A +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls last, a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls last, a +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: za + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last, a +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last, a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +1 A +2 A +2 B +NULL NULL +2 NULL +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: -+ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: za + reduceColumnSortOrder: -+ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 B +1 A +2 A +NULL NULL +2 NULL +3 NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: +- + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: zz + reduceColumnSortOrder: +- + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +1 A +2 B +2 A +2 NULL +3 NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: -- + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: zz + reduceColumnSortOrder: -- + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 B +2 A +1 A +3 NULL +2 NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +x.a x.b +2 NULL +3 NULL +NULL NULL +1 A +2 A +2 B diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out new file mode 100644 index 0000000..9064e2b --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -0,0 +1,2850 @@ +PREHOOK: query: DROP TABLE IF EXISTS e011_01 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS e011_01 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS e011_02 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS e011_02 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS e011_03 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS e011_03 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE e011_01 ( + c1 decimal(15,2), + c2 decimal(15,2)) + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_01 +POSTHOOK: query: CREATE TABLE e011_01 ( + c1 decimal(15,2), + c2 decimal(15,2)) + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_01 +PREHOOK: query: CREATE TABLE e011_02 ( + c1 decimal(15,2), + c2 decimal(15,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_02 +POSTHOOK: query: CREATE TABLE e011_02 ( + c1 decimal(15,2), + c2 decimal(15,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_02 +PREHOOK: query: CREATE TABLE e011_03 ( + c1 decimal(15,2), + c2 decimal(15,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_03 +POSTHOOK: query: CREATE TABLE e011_03 ( + c1 decimal(15,2), + c2 decimal(15,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_03 +PREHOOK: query: CREATE TABLE e011_01_small ( + c1 decimal(7,2), + c2 decimal(7,2)) + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_01_small +POSTHOOK: query: CREATE TABLE e011_01_small ( + c1 decimal(7,2), + c2 decimal(7,2)) + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_01_small +PREHOOK: query: CREATE TABLE e011_02_small ( + c1 decimal(7,2), + c2 decimal(7,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_02_small +POSTHOOK: query: CREATE TABLE e011_02_small ( + c1 decimal(7,2), + c2 decimal(7,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_02_small +PREHOOK: query: CREATE TABLE e011_03_small ( + c1 decimal(7,2), + c2 decimal(7,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_03_small +POSTHOOK: query: CREATE TABLE e011_03_small ( + c1 decimal(7,2), + c2 decimal(7,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_03_small +PREHOOK: query: LOAD DATA + LOCAL INPATH '../../data/files/e011_01.txt' + OVERWRITE + INTO TABLE e011_01 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@e011_01 +POSTHOOK: query: LOAD DATA + LOCAL INPATH '../../data/files/e011_01.txt' + OVERWRITE + INTO TABLE e011_01 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@e011_01 +PREHOOK: query: INSERT INTO TABLE e011_02 + SELECT c1, c2 + FROM e011_01 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Output: default@e011_02 +POSTHOOK: query: INSERT INTO TABLE e011_02 + SELECT c1, c2 + FROM e011_01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Output: default@e011_02 +POSTHOOK: Lineage: e011_02.c1 SIMPLE [(e011_01)e011_01.FieldSchema(name:c1, type:decimal(15,2), comment:null), ] +POSTHOOK: Lineage: e011_02.c2 SIMPLE [(e011_01)e011_01.FieldSchema(name:c2, type:decimal(15,2), comment:null), ] +c1 c2 +PREHOOK: query: INSERT INTO TABLE e011_03 + SELECT c1, c2 + FROM e011_01 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Output: default@e011_03 +POSTHOOK: query: INSERT INTO TABLE e011_03 + SELECT c1, c2 + FROM e011_01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Output: default@e011_03 +POSTHOOK: Lineage: e011_03.c1 SIMPLE [(e011_01)e011_01.FieldSchema(name:c1, type:decimal(15,2), comment:null), ] +POSTHOOK: Lineage: e011_03.c2 SIMPLE [(e011_01)e011_01.FieldSchema(name:c2, type:decimal(15,2), comment:null), ] +c1 c2 +PREHOOK: query: LOAD DATA + LOCAL INPATH '../../data/files/e011_01.txt' + OVERWRITE + INTO TABLE e011_01_small +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@e011_01_small +POSTHOOK: query: LOAD DATA + LOCAL INPATH '../../data/files/e011_01.txt' + OVERWRITE + INTO TABLE e011_01_small +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@e011_01_small +PREHOOK: query: INSERT INTO TABLE e011_02_small + SELECT c1, c2 + FROM e011_01_small +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +PREHOOK: Output: default@e011_02_small +POSTHOOK: query: INSERT INTO TABLE e011_02_small + SELECT c1, c2 + FROM e011_01_small +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +POSTHOOK: Output: default@e011_02_small +POSTHOOK: Lineage: e011_02_small.c1 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c1, type:decimal(7,2), comment:null), ] +POSTHOOK: Lineage: e011_02_small.c2 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c2, type:decimal(7,2), comment:null), ] +c1 c2 +PREHOOK: query: INSERT INTO TABLE e011_03_small + SELECT c1, c2 + FROM e011_01_small +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +PREHOOK: Output: default@e011_03_small +POSTHOOK: query: INSERT INTO TABLE e011_03_small + SELECT c1, c2 + FROM e011_01_small +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +POSTHOOK: Output: default@e011_03_small +POSTHOOK: Lineage: e011_03_small.c1 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c1, type:decimal(7,2), comment:null), ] +POSTHOOK: Lineage: e011_03_small.c2 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c2, type:decimal(7,2), comment:null), ] +c1 c2 +PREHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Output: default@e011_01 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Output: default@e011_01 +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_02 +PREHOOK: Output: default@e011_02 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_02 +POSTHOOK: Output: default@e011_02 +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_03 +PREHOOK: Output: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_03 +POSTHOOK: Output: default@e011_03 +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: ANALYZE TABLE e011_01_small COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +PREHOOK: Output: default@e011_01_small +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_01_small COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +POSTHOOK: Output: default@e011_01_small +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: ANALYZE TABLE e011_02_small COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_02_small +PREHOOK: Output: default@e011_02_small +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_02_small COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_02_small +POSTHOOK: Output: default@e011_02_small +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: ANALYZE TABLE e011_03_small COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_03_small +PREHOOK: Output: default@e011_03_small +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_03_small COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_03_small +POSTHOOK: Output: default@e011_03_small +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: explain vectorization detail +select sum(sum(c1)) over() from e011_01 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(c1)) over() from e011_01 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(c1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(15,2)) -> decimal(25,2) + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(25,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(25,2)) -> decimal(25,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + keyExpressions: ConstantVectorExpression(val 0) -> 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(25,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(35,2), bigint] + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col0 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 1:decimal(25,2)] + functionNames: [sum] + keyInputColumns: [] + native: true + nonKeyInputColumns: [1] + orderExpressions: [ConstantVectorExpression(val 0) -> 3:int] + outputColumns: [2, 1] + outputTypes: [decimal(35,2), decimal(25,2)] + streamingColumns: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(c1)) over() from e011_01 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(c1)) over() from e011_01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +#### A masked pattern was here #### +_c0 +16.00 +PREHOOK: query: explain vectorization detail +select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: c1, c2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(c1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(15,2)) -> decimal(25,2) + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:decimal(15,2), col 1:decimal(15,2) + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(15,2), KEY._col1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(25,2)) -> decimal(25,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(15,2), col 1:decimal(15,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(15,2), KEY.reducesinkkey1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(35,2)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2:decimal(25,2)] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:decimal(15,2)] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(35,2), decimal(15,2), decimal(15,2), decimal(25,2)] + partitionExpressions: [col 0:decimal(15,2)] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: e011_03 + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(15,2)) + 1 _col0 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(15,2), KEY._col1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(25,2)) -> decimal(25,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(15,2), col 1:decimal(15,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(15,2), KEY.reducesinkkey1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(35,2)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2:decimal(25,2)] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:decimal(15,2)] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(35,2), decimal(15,2), decimal(15,2), decimal(25,2)] + partitionExpressions: [col 0:decimal(15,2)] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Input: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Input: default@e011_03 +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_03 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(15,2)) + 1 _col0 (type: decimal(15,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(15,2), KEY._col1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(25,2)) -> decimal(25,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(15,2), col 1:decimal(15,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(15,2), KEY.reducesinkkey1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(35,2)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2:decimal(25,2)] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:decimal(15,2)] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(35,2), decimal(15,2), decimal(15,2), decimal(25,2)] + partitionExpressions: [col 0:decimal(15,2)] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Input: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Input: default@e011_03 +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: e011_03 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(15,2)) + 1 _col0 (type: decimal(15,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 1792 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: corr(_col0, _col2) + keys: _col1 (type: decimal(15,2)), _col3 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF corr not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: corr(VALUE._col0) + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)), _col2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Input: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Input: default@e011_03 +#### A masked pattern was here #### +sum_window_0 +NULL +NULL +NULL +NULL +PREHOOK: query: explain vectorization detail +select sum(sum(c1)) over() from e011_01_small +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(c1)) over() from e011_01_small +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01_small + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Select Operator + expressions: c1 (type: decimal(7,2)) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(c1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(7,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + keyExpressions: ConstantVectorExpression(val 0) -> 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(17,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(27,2), bigint] + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col0 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 1:decimal(17,2)] + functionNames: [sum] + keyInputColumns: [] + native: true + nonKeyInputColumns: [1] + orderExpressions: [ConstantVectorExpression(val 0) -> 3:int] + outputColumns: [2, 1] + outputTypes: [decimal(27,2), decimal(17,2)] + streamingColumns: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(27,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(c1)) over() from e011_01_small +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(c1)) over() from e011_01_small +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +#### A masked pattern was here #### +_c0 +16.00 +PREHOOK: query: explain vectorization detail +select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01_small + group by e011_01_small.c1, e011_01_small.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01_small + group by e011_01_small.c1, e011_01_small.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01_small + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Select Operator + expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + outputColumnNames: c1, c2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(c1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(7,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(7,2), KEY._col1:decimal(7,2), VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(17,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(7,2), KEY.reducesinkkey1:decimal(7,2), VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(27,2)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2:decimal(17,2)] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:decimal(7,2)] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(27,2), decimal(7,2), decimal(7,2), decimal(17,2)] + partitionExpressions: [col 0:decimal(7,2)] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(27,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01_small + group by e011_01_small.c1, e011_01_small.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01_small + group by e011_01_small.c1, e011_01_small.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(sum(e011_01_small.c1)) over( + partition by e011_01_small.c2 order by e011_01_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_01_small.c1, e011_01_small.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(e011_01_small.c1)) over( + partition by e011_01_small.c2 order by e011_01_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_01_small.c1, e011_01_small.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01_small + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: e011_03_small + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(7,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(7,2)) + 1 _col0 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(17,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(7,2), KEY._col1:decimal(7,2), VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(17,2)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(7,2), KEY.reducesinkkey1:decimal(7,2), VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(27,2)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2:decimal(17,2)] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:decimal(7,2)] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(27,2), decimal(7,2), decimal(7,2), decimal(17,2)] + partitionExpressions: [col 0:decimal(7,2)] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(27,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(e011_01_small.c1)) over( + partition by e011_01_small.c2 order by e011_01_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_01_small.c1, e011_01_small.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +PREHOOK: Input: default@e011_03_small +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(e011_01_small.c1)) over( + partition by e011_01_small.c2 order by e011_01_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_01_small.c1, e011_01_small.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +POSTHOOK: Input: default@e011_03_small +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(sum(e011_01_small.c1)) over( + partition by e011_03_small.c2 order by e011_03_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c1, e011_03_small.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(e011_01_small.c1)) over( + partition by e011_03_small.c2 order by e011_03_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c1, e011_03_small.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_03_small + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: e011_01_small + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(7,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(7,2)) + 1 _col0 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(17,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(7,2), KEY._col1:decimal(7,2), VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(17,2)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(7,2), KEY.reducesinkkey1:decimal(7,2), VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(27,2)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2:decimal(17,2)] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:decimal(7,2)] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(27,2), decimal(7,2), decimal(7,2), decimal(17,2)] + partitionExpressions: [col 0:decimal(7,2)] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(27,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(e011_01_small.c1)) over( + partition by e011_03_small.c2 order by e011_03_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c1, e011_03_small.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +PREHOOK: Input: default@e011_03_small +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(e011_01_small.c1)) over( + partition by e011_03_small.c2 order by e011_03_small.c1) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c1, e011_03_small.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +POSTHOOK: Input: default@e011_03_small +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(corr(e011_01_small.c1, e011_03_small.c1)) + over(partition by e011_01_small.c2 order by e011_03_small.c2) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c2, e011_01_small.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(corr(e011_01_small.c1, e011_03_small.c1)) + over(partition by e011_01_small.c2 order by e011_03_small.c2) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c2, e011_01_small.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01_small + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: e011_03_small + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(7,2), c2:decimal(7,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(7,2)) + 1 _col0 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 1792 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: corr(_col0, _col2) + keys: _col1 (type: decimal(7,2)), _col3 (type: decimal(7,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(7,2)) + Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF corr not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: corr(VALUE._col0) + keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)), _col2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(corr(e011_01_small.c1, e011_03_small.c1)) + over(partition by e011_01_small.c2 order by e011_03_small.c2) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c2, e011_01_small.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01_small +PREHOOK: Input: default@e011_03_small +#### A masked pattern was here #### +POSTHOOK: query: select sum(corr(e011_01_small.c1, e011_03_small.c1)) + over(partition by e011_01_small.c2 order by e011_03_small.c2) + from e011_01_small + join e011_03_small on e011_01_small.c1 = e011_03_small.c1 + group by e011_03_small.c2, e011_01_small.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01_small +POSTHOOK: Input: default@e011_03_small +#### A masked pattern was here #### +sum_window_0 +NULL +NULL +NULL +NULL diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index 5eaed53..16b59e6 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -6105,6 +6105,152 @@ Manufacturer#5 almond antique medium spring khaki 1611.66 3 2 2 1789.69 1611.66 Manufacturer#5 almond antique sky peru orange 1788.73 4 4 3 1789.69 1788.73 4 4 Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 5 4 1789.69 1018.1 5 5 Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 6 5 1789.69 1464.48 6 6 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 4216 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 40 Data size: 4216 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 40 Data size: 14936 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col0 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 0:string] + functionNames: [rank] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1] + orderExpressions: [col 0:string] + outputColumns: [2, 0, 1] + outputTypes: [int, string, double] + streamingColumns: [2] + Statistics: Num rows: 40 Data size: 14936 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 40 Data size: 4344 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 4344 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select p_mfgr, p_retailprice, rank() over(partition by p_mfgr) as r from vector_ptf_part_simple_orc @@ -6158,112 +6304,525 @@ Manufacturer#3 1922.98 1 Manufacturer#3 1190.27 1 Manufacturer#3 NULL 1 Manufacturer#3 99.68 1 -PREHOOK: query: select p_mfgr, p_retailprice, +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, rank() over(partition by p_mfgr order by p_name) as r from vector_ptf_part_simple_orc PREHOOK: type: QUERY -PREHOOK: Input: default@vector_ptf_part_simple_orc -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, rank() over(partition by p_mfgr order by p_name) as r from vector_ptf_part_simple_orc POSTHOOK: type: QUERY -POSTHOOK: Input: default@vector_ptf_part_simple_orc -#### A masked pattern was here #### -p_mfgr p_retailprice r -Manufacturer#1 1173.15 1 -Manufacturer#1 1173.15 1 -Manufacturer#1 1753.76 3 -Manufacturer#1 1753.76 3 -Manufacturer#1 1753.76 3 -Manufacturer#1 1753.76 3 -Manufacturer#1 1602.59 7 -Manufacturer#1 1414.42 8 -Manufacturer#1 1632.66 9 -Manufacturer#1 NULL 9 -Manufacturer#1 1632.66 9 -Manufacturer#1 1632.66 9 -Manufacturer#2 1690.68 1 -Manufacturer#2 1800.7 2 -Manufacturer#2 1800.7 2 -Manufacturer#2 1800.7 2 -Manufacturer#2 2031.98 5 -Manufacturer#2 900.66 6 -Manufacturer#2 1698.66 6 -Manufacturer#2 1000.6 8 -Manufacturer#3 99.68 1 -Manufacturer#3 590.27 2 -Manufacturer#3 NULL 2 -Manufacturer#3 1190.27 2 -Manufacturer#3 1190.27 2 -Manufacturer#3 55.39 6 -Manufacturer#3 1922.98 7 -Manufacturer#3 1337.29 8 -Manufacturer#4 NULL 1 -Manufacturer#4 1375.42 2 -Manufacturer#4 NULL 3 -Manufacturer#4 1206.26 3 -Manufacturer#4 1844.92 5 -Manufacturer#4 1290.35 6 -Manufacturer#5 1789.69 1 -Manufacturer#5 1611.66 2 -Manufacturer#5 1611.66 2 -Manufacturer#5 1788.73 4 -Manufacturer#5 1018.1 5 -Manufacturer#5 1464.48 6 -PREHOOK: query: select p_mfgr, p_name, p_retailprice, -rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r -from vector_ptf_part_simple_orc -PREHOOK: type: QUERY -PREHOOK: Input: default@vector_ptf_part_simple_orc +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_retailprice, -rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r -from vector_ptf_part_simple_orc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@vector_ptf_part_simple_orc + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### -p_mfgr p_name p_retailprice r -Manufacturer#1 almond antique burnished rose metallic 1173.15 1 -Manufacturer#1 almond antique burnished rose metallic 1173.15 1 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 -Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 -Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 -Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 -Manufacturer#1 almond aquamarine pink moccasin thistle NULL 9 -Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 -Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 -Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 -Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 -Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 -Manufacturer#2 almond aquamarine rose maroon antique 1698.66 6 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 -Manufacturer#3 almond antique chartreuse khaki white 99.68 1 -Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 -Manufacturer#3 almond antique forest lavender goldenrod NULL 2 -Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 -Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 -Manufacturer#3 almond antique metallic orange dim 55.39 6 -Manufacturer#3 almond antique misty red olive 1922.98 7 -Manufacturer#3 almond antique olive coral navajo 1337.29 8 -Manufacturer#4 almond antique gainsboro frosted violet NULL 1 -Manufacturer#4 almond antique violet mint lemon 1375.42 2 -Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 -Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 3 -Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 -Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0] + valueColumnNums: [2] + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1:string] + functionNames: [rank] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:string] + outputColumns: [3, 0, 1, 2] + outputTypes: [int, string, string, double] + partitionExpressions: [col 0:string] + streamingColumns: [3] + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3] + Statistics: Num rows: 40 Data size: 4344 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 4344 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_retailprice r +Manufacturer#1 1173.15 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1602.59 7 +Manufacturer#1 1414.42 8 +Manufacturer#1 1632.66 9 +Manufacturer#1 NULL 9 +Manufacturer#1 1632.66 9 +Manufacturer#1 1632.66 9 +Manufacturer#2 1690.68 1 +Manufacturer#2 1800.7 2 +Manufacturer#2 1800.7 2 +Manufacturer#2 1800.7 2 +Manufacturer#2 2031.98 5 +Manufacturer#2 900.66 6 +Manufacturer#2 1698.66 6 +Manufacturer#2 1000.6 8 +Manufacturer#3 99.68 1 +Manufacturer#3 590.27 2 +Manufacturer#3 NULL 2 +Manufacturer#3 1190.27 2 +Manufacturer#3 1190.27 2 +Manufacturer#3 55.39 6 +Manufacturer#3 1922.98 7 +Manufacturer#3 1337.29 8 +Manufacturer#4 NULL 1 +Manufacturer#4 1375.42 2 +Manufacturer#4 NULL 3 +Manufacturer#4 1206.26 3 +Manufacturer#4 1844.92 5 +Manufacturer#4 1290.35 6 +Manufacturer#5 1789.69 1 +Manufacturer#5 1611.66 2 +Manufacturer#5 1611.66 2 +Manufacturer#5 1788.73 4 +Manufacturer#5 1018.1 5 +Manufacturer#5 1464.48 6 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] + Reduce Output Operator + key expressions: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 6, 1] + keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 5:timestamp) -> 6:timestamp + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0, 9] + valueColumnNums: [2] + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp, timestamp, bigint, timestamp, timestamp] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:timestamp, KEY.reducesinkkey2:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, timestamp, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3] + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 2:string] + functionNames: [rank] + keyInputColumns: [0, 2] + native: true + nonKeyInputColumns: [3] + orderExpressions: [col 2:string] + outputColumns: [4, 0, 2, 3] + outputTypes: [int, string, string, double] + partitionExpressions: [col 0:string, IfExprColumnNull(col 5:boolean, col 6:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 5:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 6:timestamp) -> 7:timestamp] + streamingColumns: [4] + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3, 4] + Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice r +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 9 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 6 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 +Manufacturer#3 almond antique forest lavender goldenrod NULL 2 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 +Manufacturer#3 almond antique metallic orange dim 55.39 6 +Manufacturer#3 almond antique misty red olive 1922.98 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 3 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 Manufacturer#5 almond antique blue firebrick mint 1789.69 1 Manufacturer#5 almond antique medium spring khaki 1611.66 2 Manufacturer#5 almond antique medium spring khaki 1611.66 2 Manufacturer#5 almond antique sky peru orange 1788.73 4 Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] + Reduce Output Operator + key expressions: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 6] + keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 5:timestamp) -> 6:timestamp + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1, 2] + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp, timestamp] + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END ASC NULLS FIRST + partition by: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select p_mfgr, p_name, p_retailprice, rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r from vector_ptf_part_simple_orc diff --git ql/src/test/results/clientpositive/vector_string_decimal.q.out ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out similarity index 56% rename from ql/src/test/results/clientpositive/vector_string_decimal.q.out rename to ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out index 59b0588..d792c46 100644 --- ql/src/test/results/clientpositive/vector_string_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out @@ -56,48 +56,52 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: orc_decimal - Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterDoubleColumnInList(col 2:double, values [1.0E8, 2.0E8])(children: CastDecimalToDouble(col 0:decimal(18,0)) -> 2:double) - predicate: (UDFToDouble(id)) IN (1.0E8, 2.0E8) (type: boolean) - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: id (type: decimal(18,0)) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_decimal + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDoubleColumnInList(col 2:double, values [1.0E8, 2.0E8])(children: CastDecimalToDouble(col 0:decimal(18,0)) -> 2:double) + predicate: (UDFToDouble(id)) IN (1.0E8, 2.0E8) (type: boolean) + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: decimal(18,0)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out new file mode 100644 index 0000000..4429e9a --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out @@ -0,0 +1,400 @@ +PREHOOK: query: explain vectorization detail +select 'key1', 'value1' from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select 'key1', 'value1' from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Row Limit Per Split: 1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: 'key1' (type: string), 'value1' (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13, 14] + selectExpressions: ConstantVectorExpression(val key1) -> 13:string, ConstantVectorExpression(val value1) -> 14:string + Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [string, string] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +_c0 _c1 +key1 value1 +PREHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: explain vectorization detail +insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Row Limit Per Split: 1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: 17.29 (type: decimal(18,9)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + selectExpressions: ConstantVectorExpression(val 17.29) -> 13:decimal(18,9) + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.decimal_2 + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(18,9)] + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.decimal_2 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@decimal_2 +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +_col0 +PREHOOK: query: select count(*) from decimal_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from decimal_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +_c0 +1 +PREHOOK: query: drop table decimal_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_2 +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: drop table decimal_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_2 +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: explain vectorization detail +select count(1) from (select * from (Select 1 a) x order by x.a) y +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select count(1) from (select * from (Select 1 a) x order by x.a) y +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false +#### A masked pattern was here #### + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +_c0 +1 +PREHOOK: query: explain vectorization detail +create temporary table dual as select 1 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain vectorization detail +create temporary table dual as select 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dual + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false +#### A masked pattern was here #### + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: _c0 int + input format: org.apache.hadoop.mapred.TextInputFormat +#### A masked pattern was here #### + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dual + isTemporary: true + + Stage: Stage-3 + Stats Work + Basic Stats Work: + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create temporary table dual as select 1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@dual +POSTHOOK: query: create temporary table dual as select 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dual +_c0 +PREHOOK: query: select * from dual +PREHOOK: type: QUERY +PREHOOK: Input: default@dual +#### A masked pattern was here #### +POSTHOOK: query: select * from dual +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dual +#### A masked pattern was here #### +dual._c0 +1 diff --git ql/src/test/results/clientpositive/llap/vector_udf2.q.out ql/src/test/results/clientpositive/llap/vector_udf2.q.out new file mode 100644 index 0000000..8e3ccc9 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_udf2.q.out @@ -0,0 +1,330 @@ +PREHOOK: query: drop table varchar_udf_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_udf_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_udf_2 +POSTHOOK: query: create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_udf_2 +PREHOOK: query: insert overwrite table varchar_udf_2 + select key, value, key, value from src where key = '238' limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar_udf_2 +POSTHOOK: query: insert overwrite table varchar_udf_2 + select key, value, key, value from src where key = '238' limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar_udf_2 +POSTHOOK: Lineage: varchar_udf_2.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_2.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain vectorization expression +select + c1 LIKE '%38%', + c2 LIKE 'val_%', + c3 LIKE '%38', + c1 LIKE '%3x8%', + c2 LIKE 'xval_%', + c3 LIKE '%x38' +from varchar_udf_2 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select + c1 LIKE '%38%', + c2 LIKE 'val_%', + c3 LIKE '%38', + c1 LIKE '%3x8%', + c2 LIKE 'xval_%', + c3 LIKE '%x38' +from varchar_udf_2 limit 1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: varchar_udf_2 + Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Select Operator + expressions: (c1 like '%38%') (type: boolean), (c2 like 'val_%') (type: boolean), (c3 like '%38') (type: boolean), (c1 like '%3x8%') (type: boolean), (c2 like 'xval_%') (type: boolean), (c3 like '%x38') (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5, 6, 7, 8, 9, 10] + selectExpressions: SelectStringColLikeStringScalar(col 0:string) -> 5:boolean, SelectStringColLikeStringScalar(col 1:string) -> 6:boolean, SelectStringColLikeStringScalar(col 2:varchar(10)) -> 7:boolean, SelectStringColLikeStringScalar(col 0:string) -> 8:boolean, SelectStringColLikeStringScalar(col 1:string) -> 9:boolean, SelectStringColLikeStringScalar(col 2:varchar(10)) -> 10:boolean + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select + c1 LIKE '%38%', + c2 LIKE 'val_%', + c3 LIKE '%38', + c1 LIKE '%3x8%', + c2 LIKE 'xval_%', + c3 LIKE '%x38' +from varchar_udf_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_2 +#### A masked pattern was here #### +POSTHOOK: query: select + c1 LIKE '%38%', + c2 LIKE 'val_%', + c3 LIKE '%38', + c1 LIKE '%3x8%', + c2 LIKE 'xval_%', + c3 LIKE '%x38' +from varchar_udf_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_2 +#### A masked pattern was here #### +true true true false false false +PREHOOK: query: drop table varchar_udf_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_udf_2 +PREHOOK: Output: default@varchar_udf_2 +POSTHOOK: query: drop table varchar_udf_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_udf_2 +POSTHOOK: Output: default@varchar_udf_2 +PREHOOK: query: create temporary table HIVE_14349 (a string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@HIVE_14349 +POSTHOOK: query: create temporary table HIVE_14349 (a string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@HIVE_14349 +PREHOOK: query: insert into HIVE_14349 values('XYZa'), ('badXYZa') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hive_14349 +POSTHOOK: query: insert into HIVE_14349 values('XYZa'), ('badXYZa') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hive_14349 +POSTHOOK: Lineage: hive_14349.a SCRIPT [] +PREHOOK: query: explain vectorization expression +select * from HIVE_14349 where a LIKE 'XYZ%a%' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from HIVE_14349 where a LIKE 'XYZ%a%' +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hive_14349 + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringColLikeStringScalar(col 0:string, pattern XYZ%a%) + predicate: (a like 'XYZ%a%') (type: boolean) + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a%' +PREHOOK: type: QUERY +PREHOOK: Input: default@hive_14349 +#### A masked pattern was here #### +POSTHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a%' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive_14349 +#### A masked pattern was here #### +XYZa +PREHOOK: query: insert into HIVE_14349 values ('XYZab'), ('XYZabBAD'), ('badXYZab'), ('badXYZabc') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hive_14349 +POSTHOOK: query: insert into HIVE_14349 values ('XYZab'), ('XYZabBAD'), ('badXYZab'), ('badXYZabc') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hive_14349 +POSTHOOK: Lineage: hive_14349.a SCRIPT [] +PREHOOK: query: explain vectorization expression +select * from HIVE_14349 where a LIKE 'XYZ%a_' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from HIVE_14349 where a LIKE 'XYZ%a_' +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hive_14349 + Statistics: Num rows: 6 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringColLikeStringScalar(col 0:string, pattern XYZ%a_) + predicate: (a like 'XYZ%a_') (type: boolean) + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a_' +PREHOOK: type: QUERY +PREHOOK: Input: default@hive_14349 +#### A masked pattern was here #### +POSTHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a_' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive_14349 +#### A masked pattern was here #### +XYZab +PREHOOK: query: drop table HIVE_14349 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hive_14349 +PREHOOK: Output: default@hive_14349 +POSTHOOK: query: drop table HIVE_14349 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hive_14349 +POSTHOOK: Output: default@hive_14349 diff --git ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out similarity index 72% rename from ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out rename to ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out index 1761b5a..647fcb7 100644 --- ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out @@ -124,33 +124,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), UDFToBoolean(s) (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 12 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s (type: string), UDFToBoolean(s) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorization_div0.q.out ql/src/test/results/clientpositive/llap/vectorization_div0.q.out similarity index 54% rename from ql/src/test/results/clientpositive/vectorization_div0.q.out rename to ql/src/test/results/clientpositive/llap/vectorization_div0.q.out index 64c05c7..2c55516 100644 --- ql/src/test/results/clientpositive/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_div0.q.out @@ -14,48 +14,52 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Select Operator - expressions: (UDFToDouble(cint) / 0.0) (type: double), (UDFToDouble(ctinyint) / 0.0) (type: double), (UDFToDouble(cbigint) / 0.0) (type: double), (cdouble / 0.0) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [14, 15, 16, 13] - selectExpressions: DoubleColDivideDoubleScalar(col 13:double, val 0.0)(children: CastLongToDouble(col 2:int) -> 13:double) -> 14:double, DoubleColDivideDoubleScalar(col 13:double, val 0.0)(children: CastLongToDouble(col 0:tinyint) -> 13:double) -> 15:double, DoubleColDivideDoubleScalar(col 13:double, val 0.0)(children: CastLongToDouble(col 3:bigint) -> 13:double) -> 16:double, DoubleColDivideDoubleScalar(col 5:double, val 0.0) -> 13:double - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 220184 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Select Operator + expressions: (UDFToDouble(cint) / 0.0) (type: double), (UDFToDouble(ctinyint) / 0.0) (type: double), (UDFToDouble(cbigint) / 0.0) (type: double), (cdouble / 0.0) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [14, 15, 16, 13] + selectExpressions: DoubleColDivideDoubleScalar(col 13:double, val 0.0)(children: CastLongToDouble(col 2:int) -> 13:double) -> 14:double, DoubleColDivideDoubleScalar(col 13:double, val 0.0)(children: CastLongToDouble(col 0:tinyint) -> 13:double) -> 15:double, DoubleColDivideDoubleScalar(col 13:double, val 0.0)(children: CastLongToDouble(col 3:bigint) -> 13:double) -> 16:double, DoubleColDivideDoubleScalar(col 5:double, val 0.0) -> 13:double + Statistics: Num rows: 12288 Data size: 393216 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -189,69 +193,89 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val 0), FilterLongColLessLongScalar(col 3:bigint, val 100000000)) - predicate: ((cbigint < 100000000) and (cbigint > 0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val 0), FilterLongColLessLongScalar(col 3:bigint, val 100000000)) + predicate: ((cbigint < 100000000) and (cbigint > 0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13, 16, 18] + selectExpressions: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 13:bigint, DoubleColDivideDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 15:double) -> 16:double, DecimalScalarDivideDecimalColumn(val 1.2, col 17:decimal(19,0))(children: CastLongToDecimal(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 17:decimal(19,0)) -> 18:decimal(22,21) + Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(22,21)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: Select Operator - expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) + expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [13, 16, 18] - selectExpressions: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 13:bigint, DoubleColDivideDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 15:double) -> 16:double, DecimalScalarDivideDecimalColumn(val 1.2, col 17:decimal(19,0))(children: CastLongToDecimal(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 17:decimal(19,0)) -> 18:decimal(22,21) - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: double) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: decimal(22,21)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -269,9 +293,9 @@ from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --985319 NULL -0.000001217879691754650 --985319 2.0297994862577501E-4 -0.000001217879691754650 -63925 0.11256941728588189 -0.000018771998435666797 +-985319 2.0297994862577501E-4 -0.000001217879691754650 +-985319 NULL -0.000001217879691754650 0 NULL NULL 0 NULL NULL 0 NULL NULL @@ -281,21 +305,6 @@ POSTHOOK: Input: default@alltypesorc 0 NULL NULL 0 NULL NULL 0 NULL NULL -392309 NULL 0.000003058813333367320 -673083 -0.010691103474608629 0.000001782841046349410 -2331159 NULL 0.000000514765402102559 -2342037 NULL 0.000000512374484263058 -3533105 -5.660743170667161E-5 0.000000339644590240030 -3768727 0.004139594085748318 0.000000318409903397089 -4728619 NULL 0.000000253773881972728 -5391403 NULL 0.000000222576572369010 -7022666 -0.0010246820794268159 0.000000170875277280736 -7470430 NULL 0.000000160633323650714 -8276429 NULL 0.000000144990067576246 -8286860 -8.683626850218298E-4 0.000000144807562816314 -8299981 -8.669899364829872E-4 0.000000144578644216174 -9247593 NULL 0.000000129763496295739 -9821695 -7.326637611939691E-4 0.000000122178503812224 10000738 0.001559984873116364 0.000000119991144653525 10081828 0.0015474376273826532 0.000000119026033770860 10745355 -6.696847149303117E-4 0.000000111676161466978 @@ -315,6 +324,8 @@ POSTHOOK: Input: default@alltypesorc 20165679 7.736411950224934E-4 0.000000059507046601307 20547875 NULL 0.000000058400199534015 23264783 NULL 0.000000051580107151655 +2331159 NULL 0.000000514765402102559 +2342037 NULL 0.000000512374484263058 23475527 6.645644206411213E-4 0.000000051117063314489 24379905 NULL 0.000000049220864478348 24514624 -2.935390728407664E-4 0.000000048950373458716 @@ -330,12 +341,15 @@ POSTHOOK: Input: default@alltypesorc 33126539 NULL 0.000000036224732079617 34603086 NULL 0.000000034678987879867 35156265 NULL 0.000000034133318769784 +3533105 -5.660743170667161E-5 0.000000339644590240030 35862260 NULL 0.000000033461360215447 36123797 -1.992038655294182E-4 0.000000033219099310075 36341671 -1.980096072082101E-4 0.000000033019945615599 36413215 -5.4925114412446145E-6 0.000000032955068647468 36578596 4.2650625518814335E-4 0.000000032806070522772 36796441 -1.955623914823719E-4 0.000000032611849607955 +3768727 0.004139594085748318 0.000000318409903397089 +392309 NULL 0.000003058813333367320 39723587 NULL 0.000000030208752296211 39985709 -1.7996429674411925E-4 0.000000030010722080731 40018606 NULL 0.000000029986051987918 @@ -348,6 +362,7 @@ POSTHOOK: Input: default@alltypesorc 45717793 3.4124569399052136E-4 0.000000026247986205283 46163162 NULL 0.000000025994753132379 46525838 3.353190543284787E-4 0.000000025792120068853 +4728619 NULL 0.000000253773881972728 48626663 NULL 0.000000024677819244969 49102701 -1.465499830650864E-4 0.000000024438574163161 50300445 -1.4306036457530346E-4 0.000000023856647789100 @@ -356,6 +371,7 @@ POSTHOOK: Input: default@alltypesorc 52667422 2.9621727070673783E-4 0.000000022784483356713 52962061 2.945693522010029E-4 0.000000022657728520044 53695172 NULL 0.000000022348377988248 +5391403 NULL 0.000000222576572369010 54760317 NULL 0.000000021913678841560 55020655 2.835480602693661E-4 0.000000021809991175132 56102034 NULL 0.000000021389598815615 @@ -369,6 +385,14 @@ POSTHOOK: Input: default@alltypesorc 59347745 NULL 0.000000020219807846111 60229567 NULL 0.000000019923769334088 60330397 NULL 0.000000019890470801974 +673083 -0.010691103474608629 0.000001782841046349410 +7022666 -0.0010246820794268159 0.000000170875277280736 +7470430 NULL 0.000000160633323650714 +8276429 NULL 0.000000144990067576246 +8286860 -8.683626850218298E-4 0.000000144807562816314 +8299981 -8.669899364829872E-4 0.000000144578644216174 +9247593 NULL 0.000000129763496295739 +9821695 -7.326637611939691E-4 0.000000122178503812224 PREHOOK: query: explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 @@ -387,69 +411,89 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -500.0), FilterDoubleColLessDoubleScalar(col 5:double, val -199.0)) - predicate: ((cdouble < -199.0) and (cdouble >= -500.0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -500.0), FilterDoubleColLessDoubleScalar(col 5:double, val -199.0)) + predicate: ((cdouble < -199.0) and (cdouble >= -500.0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13, 16, 17, 15, 18] + selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 16:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 17:double, DoubleScalarDivideDoubleColumn(val 3.0, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 15:double, DoubleScalarDivideDoubleColumn(val 1.2, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 18:double + Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: Select Operator - expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col4, _col5 + expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [13, 16, 17, 15, 18] - selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 16:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 17:double, DoubleScalarDivideDoubleColumn(val 3.0, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 15:double, DoubleScalarDivideDoubleColumn(val 1.2, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 18:double - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + projectedOutputColumnNums: [0, 1, 2, 1, 3, 4] + Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -467,68 +511,68 @@ from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --292.0 NULL 1.0 NULL -0.010273972602739725 -0.00410958904109589 --290.0 NULL 1.0 NULL -0.010344827586206896 -0.004137931034482759 --289.0 NULL 1.0 NULL -0.010380622837370242 -0.004152249134948096 --281.0 NULL 1.0 NULL -0.010676156583629894 -0.004270462633451957 --279.0 NULL 1.0 NULL -0.010752688172043012 -0.004301075268817204 --274.0 6888911.518248175 1.0 6888911.518248175 -0.010948905109489052 -0.00437956204379562 --273.0 6028764.868131869 1.0 6028764.868131869 -0.01098901098901099 -0.004395604395604396 --257.0 6404096.53307393 1.0 6404096.53307393 -0.011673151750972763 -0.004669260700389105 --250.0 6583411.236 1.0 6583411.236 -0.012 -0.0048 --247.0 NULL 1.0 NULL -0.012145748987854251 -0.004858299595141701 --247.0 -7546669.174089069 1.0 -7546669.174089069 -0.012145748987854251 -0.004858299595141701 --246.0 NULL 1.0 NULL -0.012195121951219513 -0.004878048780487805 --237.0 NULL 1.0 NULL -0.012658227848101266 -0.005063291139240506 --236.0 NULL 1.0 NULL -0.012711864406779662 -0.005084745762711864 --229.0 7187130.170305677 1.0 7187130.170305677 -0.013100436681222707 -0.005240174672489083 --228.0 8278779.631578947 1.0 8278779.631578947 -0.013157894736842105 -0.005263157894736842 --225.0 NULL 1.0 NULL -0.013333333333333334 -0.005333333333333333 --210.0 -8876320.40952381 1.0 -8876320.40952381 -0.014285714285714285 -0.005714285714285714 --201.0 NULL 1.0 NULL -0.014925373134328358 -0.005970149253731343 --199.0 NULL 1.0 NULL -0.01507537688442211 -0.006030150753768844 --189.0 NULL 1.0 NULL -0.015873015873015872 -0.006349206349206349 --188.0 NULL 1.0 NULL -0.015957446808510637 -0.006382978723404255 --184.0 8944852.222826088 1.0 8944852.222826088 -0.016304347826086956 -0.006521739130434782 --183.0 8993731.196721312 1.0 8993731.196721312 -0.01639344262295082 -0.006557377049180328 --181.0 NULL 1.0 NULL -0.016574585635359115 -0.0066298342541436465 --179.0 NULL 1.0 NULL -0.01675977653631285 -0.0067039106145251395 --169.0 9738774.01775148 1.0 9738774.01775148 -0.01775147928994083 -0.007100591715976331 --164.0 NULL 1.0 NULL -0.018292682926829267 -0.007317073170731707 --161.0 NULL 1.0 NULL -0.018633540372670808 -0.007453416149068323 --154.0 1.2256894519480519E7 1.0 1.2256894519480519E7 -0.01948051948051948 -0.007792207792207792 --152.0 NULL 1.0 NULL -0.019736842105263157 -0.007894736842105263 --148.0 NULL 1.0 NULL -0.02027027027027027 -0.008108108108108109 --140.0 NULL 1.0 NULL -0.02142857142857143 -0.008571428571428572 --138.0 NULL 1.0 NULL -0.021739130434782608 -0.008695652173913044 --137.0 NULL 1.0 NULL -0.021897810218978103 -0.00875912408759124 --132.0 NULL 1.0 NULL -0.022727272727272728 -0.00909090909090909 --129.0 1.2758548906976745E7 1.0 1.2758548906976745E7 -0.023255813953488372 -0.009302325581395349 --128.0 NULL 1.0 NULL -0.0234375 -0.009375 --126.0 NULL 1.0 NULL -0.023809523809523808 -0.009523809523809523 --126.0 -1.4793867349206349E7 1.0 -1.4793867349206349E7 -0.023809523809523808 -0.009523809523809523 --116.0 NULL 1.0 NULL -0.02586206896551724 -0.010344827586206896 --113.0 NULL 1.0 NULL -0.02654867256637168 -0.010619469026548672 -113.0 -1.6495816690265486E7 1.0 -1.6495816690265486E7 -0.02654867256637168 -0.010619469026548672 --96.0 NULL 1.0 NULL -0.03125 -0.012499999999999999 --94.0 -1.9830077510638297E7 1.0 -1.9830077510638297E7 -0.031914893617021274 -0.01276595744680851 --93.0 NULL 1.0 NULL -0.03225806451612903 -0.012903225806451613 --77.0 2.4513789038961038E7 1.0 2.4513789038961038E7 -0.03896103896103896 -0.015584415584415584 --69.0 2.735596747826087E7 1.0 2.735596747826087E7 -0.043478260869565216 -0.017391304347826087 --62.0 NULL 1.0 NULL -0.04838709677419355 -0.01935483870967742 --62.0 3.0444544451612905E7 1.0 3.0444544451612905E7 -0.04838709677419355 -0.01935483870967742 --60.0 NULL 1.0 NULL -0.05 -0.02 --57.0 -3.27022330877193E7 1.0 -3.27022330877193E7 -0.05263157894736842 -0.021052631578947368 --49.0 3.35888328367347E7 1.0 3.35888328367347E7 -0.061224489795918366 -0.024489795918367346 --46.0 3.577940889130435E7 1.0 3.577940889130435E7 -0.06521739130434782 -0.02608695652173913 --38.0 4.3311916026315786E7 1.0 4.3311916026315786E7 -0.07894736842105263 -0.031578947368421054 +-113.0 NULL 1.0 NULL -0.02654867256637168 -0.010619469026548672 +-116.0 NULL 1.0 NULL -0.02586206896551724 -0.010344827586206896 +-12.0 -1.5533560716666666E8 1.0 -1.5533560716666666E8 -0.25 -0.09999999999999999 +-126.0 -1.4793867349206349E7 1.0 -1.4793867349206349E7 -0.023809523809523808 -0.009523809523809523 +-126.0 NULL 1.0 NULL -0.023809523809523808 -0.009523809523809523 +-128.0 NULL 1.0 NULL -0.0234375 -0.009375 +-129.0 1.2758548906976745E7 1.0 1.2758548906976745E7 -0.023255813953488372 -0.009302325581395349 +-132.0 NULL 1.0 NULL -0.022727272727272728 -0.00909090909090909 +-137.0 NULL 1.0 NULL -0.021897810218978103 -0.00875912408759124 +-138.0 NULL 1.0 NULL -0.021739130434782608 -0.008695652173913044 +-140.0 NULL 1.0 NULL -0.02142857142857143 -0.008571428571428572 +-148.0 NULL 1.0 NULL -0.02027027027027027 -0.008108108108108109 +-152.0 NULL 1.0 NULL -0.019736842105263157 -0.007894736842105263 +-154.0 1.2256894519480519E7 1.0 1.2256894519480519E7 -0.01948051948051948 -0.007792207792207792 +-161.0 NULL 1.0 NULL -0.018633540372670808 -0.007453416149068323 +-164.0 NULL 1.0 NULL -0.018292682926829267 -0.007317073170731707 +-169.0 9738774.01775148 1.0 9738774.01775148 -0.01775147928994083 -0.007100591715976331 +-17.0 NULL 1.0 NULL -0.17647058823529413 -0.07058823529411765 +-179.0 NULL 1.0 NULL -0.01675977653631285 -0.0067039106145251395 +-181.0 NULL 1.0 NULL -0.016574585635359115 -0.0066298342541436465 +-183.0 8993731.196721312 1.0 8993731.196721312 -0.01639344262295082 -0.006557377049180328 +-184.0 8944852.222826088 1.0 8944852.222826088 -0.016304347826086956 -0.006521739130434782 +-188.0 NULL 1.0 NULL -0.015957446808510637 -0.006382978723404255 +-189.0 NULL 1.0 NULL -0.015873015873015872 -0.006349206349206349 +-199.0 NULL 1.0 NULL -0.01507537688442211 -0.006030150753768844 +-20.0 NULL 1.0 NULL -0.15 -0.06 +-201.0 NULL 1.0 NULL -0.014925373134328358 -0.005970149253731343 +-21.0 8.988389314285715E7 1.0 8.988389314285715E7 -0.14285714285714285 -0.05714285714285714 +-210.0 -8876320.40952381 1.0 -8876320.40952381 -0.014285714285714285 -0.005714285714285714 +-225.0 NULL 1.0 NULL -0.013333333333333334 -0.005333333333333333 +-228.0 8278779.631578947 1.0 8278779.631578947 -0.013157894736842105 -0.005263157894736842 +-229.0 7187130.170305677 1.0 7187130.170305677 -0.013100436681222707 -0.005240174672489083 +-236.0 NULL 1.0 NULL -0.012711864406779662 -0.005084745762711864 +-237.0 NULL 1.0 NULL -0.012658227848101266 -0.005063291139240506 +-246.0 NULL 1.0 NULL -0.012195121951219513 -0.004878048780487805 +-247.0 -7546669.174089069 1.0 -7546669.174089069 -0.012145748987854251 -0.004858299595141701 +-247.0 NULL 1.0 NULL -0.012145748987854251 -0.004858299595141701 +-250.0 6583411.236 1.0 6583411.236 -0.012 -0.0048 +-257.0 6404096.53307393 1.0 6404096.53307393 -0.011673151750972763 -0.004669260700389105 +-273.0 6028764.868131869 1.0 6028764.868131869 -0.01098901098901099 -0.004395604395604396 +-274.0 6888911.518248175 1.0 6888911.518248175 -0.010948905109489052 -0.00437956204379562 +-279.0 NULL 1.0 NULL -0.010752688172043012 -0.004301075268817204 -28.0 5.878045746428572E7 1.0 5.878045746428572E7 -0.10714285714285714 -0.04285714285714286 -28.0 6.741291985714285E7 1.0 6.741291985714285E7 -0.10714285714285714 -0.04285714285714286 --21.0 8.988389314285715E7 1.0 8.988389314285715E7 -0.14285714285714285 -0.05714285714285714 --20.0 NULL 1.0 NULL -0.15 -0.06 --17.0 NULL 1.0 NULL -0.17647058823529413 -0.07058823529411765 --12.0 -1.5533560716666666E8 1.0 -1.5533560716666666E8 -0.25 -0.09999999999999999 +-281.0 NULL 1.0 NULL -0.010676156583629894 -0.004270462633451957 +-289.0 NULL 1.0 NULL -0.010380622837370242 -0.004152249134948096 +-290.0 NULL 1.0 NULL -0.010344827586206896 -0.004137931034482759 +-292.0 NULL 1.0 NULL -0.010273972602739725 -0.00410958904109589 -3.0 NULL 1.0 NULL -1.0 -0.39999999999999997 +-38.0 4.3311916026315786E7 1.0 4.3311916026315786E7 -0.07894736842105263 -0.031578947368421054 +-46.0 3.577940889130435E7 1.0 3.577940889130435E7 -0.06521739130434782 -0.02608695652173913 +-49.0 3.35888328367347E7 1.0 3.35888328367347E7 -0.061224489795918366 -0.024489795918367346 +-57.0 -3.27022330877193E7 1.0 -3.27022330877193E7 -0.05263157894736842 -0.021052631578947368 +-60.0 NULL 1.0 NULL -0.05 -0.02 +-62.0 3.0444544451612905E7 1.0 3.0444544451612905E7 -0.04838709677419355 -0.01935483870967742 +-62.0 NULL 1.0 NULL -0.04838709677419355 -0.01935483870967742 +-69.0 2.735596747826087E7 1.0 2.735596747826087E7 -0.043478260869565216 -0.017391304347826087 +-77.0 2.4513789038961038E7 1.0 2.4513789038961038E7 -0.03896103896103896 -0.015584415584415584 +-93.0 NULL 1.0 NULL -0.03225806451612903 -0.012903225806451613 +-94.0 -1.9830077510638297E7 1.0 -1.9830077510638297E7 -0.031914893617021274 -0.01276595744680851 +-96.0 NULL 1.0 NULL -0.03125 -0.012499999999999999 0.0 NULL NULL NULL NULL NULL 0.0 NULL NULL NULL NULL NULL 0.0 NULL NULL NULL NULL NULL @@ -585,69 +629,89 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 2:int, val 500000000), FilterDoubleColGreaterDoubleScalar(col 5:double, val 1.0E9), FilterLongColEqualLongScalar(col 0:tinyint, val 0)) - predicate: ((cdouble > 1.0E9) or (cint > 500000000) or (ctinyint = 0)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 220184 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 2:int, val 500000000), FilterDoubleColGreaterDoubleScalar(col 5:double, val 1.0E9), FilterLongColEqualLongScalar(col 0:tinyint, val 0)) + predicate: ((cdouble > 1.0E9) or (cint > 500000000) or (ctinyint = 0)) (type: boolean) + Statistics: Num rows: 4193 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int), cbigint (type: bigint), ctinyint (type: tinyint), (cint / (cint - 528534767)) (type: double), (cbigint / (cbigint - 1018195815)) (type: double), (ctinyint / ctinyint) (type: double), (cint % (cint - 528534767)) (type: int), (cbigint % (cbigint - 1018195815)) (type: bigint), (ctinyint % ctinyint) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 0, 14, 15, 16, 17, 18, 13] + selectExpressions: LongColDivideLongColumn(col 2:int, col 13:int)(children: LongColSubtractLongScalar(col 2:int, val 528534767) -> 13:int) -> 14:double, LongColDivideLongColumn(col 3:bigint, col 13:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 1018195815) -> 13:bigint) -> 15:double, LongColDivideLongColumn(col 0:tinyint, col 0:tinyint) -> 16:double, LongColModuloLongColumn(col 2:int, col 13:int)(children: LongColSubtractLongScalar(col 2:int, val 528534767) -> 13:int) -> 17:int, LongColModuloLongColumn(col 3:bigint, col 13:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 1018195815) -> 13:bigint) -> 18:bigint, LongColModuloLongColumn(col 0:tinyint, col 0:tinyint) -> 13:tinyint + Statistics: Num rows: 4193 Data size: 217816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: double), _col4 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4193 Data size: 217816 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: tinyint), _col5 (type: double), _col6 (type: int), _col7 (type: bigint), _col8 (type: tinyint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: Select Operator - expressions: cint (type: int), cbigint (type: bigint), ctinyint (type: tinyint), (cint / (cint - 528534767)) (type: double), (cbigint / (cbigint - 1018195815)) (type: double), (ctinyint / ctinyint) (type: double), (cint % (cint - 528534767)) (type: int), (cbigint % (cbigint - 1018195815)) (type: bigint), (ctinyint % ctinyint) (type: tinyint) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: tinyint), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: int), VALUE._col5 (type: bigint), VALUE._col6 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [2, 3, 0, 14, 15, 16, 17, 18, 13] - selectExpressions: LongColDivideLongColumn(col 2:int, col 13:int)(children: LongColSubtractLongScalar(col 2:int, val 528534767) -> 13:int) -> 14:double, LongColDivideLongColumn(col 3:bigint, col 13:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 1018195815) -> 13:bigint) -> 15:double, LongColDivideLongColumn(col 0:tinyint, col 0:tinyint) -> 16:double, LongColModuloLongColumn(col 2:int, col 13:int)(children: LongColSubtractLongScalar(col 2:int, val 528534767) -> 13:int) -> 17:int, LongColModuloLongColumn(col 3:bigint, col 13:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 1018195815) -> 13:bigint) -> 18:bigint, LongColModuloLongColumn(col 0:tinyint, col 0:tinyint) -> 13:tinyint - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: double), _col4 (type: double) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: tinyint), _col5 (type: double), _col6 (type: int), _col7 (type: bigint), _col8 (type: tinyint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: tinyint), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: int), VALUE._col5 (type: bigint), VALUE._col6 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + projectedOutputColumnNums: [2, 3, 4, 0, 1, 5, 6, 7, 8] + Statistics: Num rows: 4193 Data size: 217816 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 5216 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 5216 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -665,103 +729,103 @@ from alltypesorc where cint > 500000000 or cdouble > 1000000000 or ctinyint = 0 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -528534767 NULL -47 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -1 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -12 NULL NULL 1.0 NULL NULL 0 528534767 NULL -13 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 9 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -16 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -16 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -19 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 26 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 0 NULL NULL NULL NULL NULL NULL -528534767 NULL -12 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -1 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -43 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -37 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 2 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 10 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 13 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -24 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 41 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 43 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 -528534767 NULL NULL NULL NULL NULL NULL NULL NULL -528534767 NULL 27 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -53 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 528534767 NULL -30 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -32 NULL NULL 1.0 NULL NULL 0 528534767 NULL -33 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 0 NULL NULL NULL NULL NULL NULL -528534767 NULL -59 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 21 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -40 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -36 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 30 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 18 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 528534767 NULL -33 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 19 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 5 NULL NULL 1.0 NULL NULL 0 528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -19 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -36 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -37 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -40 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -43 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -44 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -47 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -50 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -51 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -53 NULL NULL 1.0 NULL NULL 0 528534767 NULL -54 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -16 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 46 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -16 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 28 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 528534767 NULL -57 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 4 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -59 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -62 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -7 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 0 NULL NULL NULL NULL NULL NULL +528534767 NULL 0 NULL NULL NULL NULL NULL NULL +528534767 NULL 10 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 13 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 16 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 18 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 19 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 2 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 21 NULL NULL 1.0 NULL NULL 0 528534767 NULL 24 NULL NULL 1.0 NULL NULL 0 -NULL 1018195815 0 NULL NULL NULL NULL NULL NULL -528534767 NULL -44 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -24 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 528534767 NULL 24 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -7 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -32 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 26 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 27 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 28 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 30 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 33 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 528534767 NULL 39 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 4 NULL NULL 1.0 NULL NULL 0 528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -62 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -51 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 62 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 16 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 41 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 43 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 46 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 5 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 62 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 9 NULL NULL 1.0 NULL NULL 0 528534767 NULL NULL NULL NULL NULL NULL NULL NULL -528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 33 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -50 NULL NULL 1.0 NULL NULL 0 +528534767 NULL NULL NULL NULL NULL NULL NULL NULL +NULL 1018195815 0 NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out new file mode 100644 index 0000000..c8959ef --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -0,0 +1,943 @@ +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 183488 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 20400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 7 + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 7 + Processor Tree: + ListSink + +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-1887561756 -10011.0 +-1887561756 -13877.0 +-1887561756 -2281.0 +-1887561756 -8881.0 +-1887561756 10361.0 +-1887561756 1839.0 +-1887561756 9531.0 +PREHOOK: query: explain vectorization detail +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:tinyint) + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5, 1] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 5] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: smallint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double, VALUE._col0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-64 -10462.0 -10462 +-64 -15920.0 -15920 +-64 -1600.0 -1600 +-64 -200.0 -200 +-64 -2919.0 -2919 +-64 -3097.0 -3097 +-64 -3586.0 -3586 +-64 -4018.0 -4018 +-64 -4040.0 -4040 +-64 -4803.0 -4803 +-64 -6907.0 -6907 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -8080.0 -8080 +-64 -9842.0 -9842 +PREHOOK: query: explain vectorization detail +select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: ctinyint (type: tinyint), (cdouble + 1.0) (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 13] + selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: avg(_col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDouble(col 13:double) -> struct + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 128 Data size: 10628 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0] + valueColumnNums: [1] + Statistics: Num rows: 128 Data size: 10628 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: struct) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [double] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:tinyint, VALUE._col0:struct + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 1:struct) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-46 3033.55 +-47 -574.6428571428571 +-48 1672.909090909091 +-49 768.7659574468086 +-50 -960.0192307692307 +-51 -96.46341463414635 +-52 2810.705882352941 +-53 -532.7567567567568 +-54 2712.7272727272725 +-55 2385.595744680851 +-56 2595.818181818182 +-57 1867.0535714285713 +-58 3483.2444444444445 +-59 318.27272727272725 +-60 1071.82 +-61 914.3404255319149 +-62 245.69387755102042 +-63 2178.7272727272725 +-64 373.52941176470586 +NULL 9370.0945309795 +PREHOOK: query: explain vectorization detail +select distinct(ctinyint) from alltypesorc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select distinct(ctinyint) from alltypesorc limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ctinyint (type: tinyint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-46 +-47 +-48 +-49 +-50 +-51 +-52 +-53 +-54 +-55 +-56 +-57 +-58 +-59 +-60 +-61 +-62 +-63 +-64 +NULL +PREHOOK: query: explain vectorization detail +select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: ctinyint (type: tinyint), cdouble (type: double) + outputColumnNames: ctinyint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5] + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint, col 5:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ctinyint (type: tinyint), cdouble (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0] + valueColumnNums: [] + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:tinyint, KEY._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint, col 1:double + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1:double) -> bigint + className: VectorGroupByOperator + groupByMode: COMPLETE + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0] + keys: _col0 (type: tinyint) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-46 24 +-47 22 +-48 29 +-49 26 +-50 30 +-51 21 +-52 33 +-53 22 +-54 26 +-55 29 +-56 36 +-57 35 +-58 23 +-59 31 +-60 27 +-61 25 +-62 27 +-63 19 +-64 24 +NULL 2932 +PREHOOK: query: explain vectorization detail +select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +PREHOOK: query: explain vectorization detail +select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:tinyint) + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 5:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: cdouble (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:double, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:double + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: bigint), _col0 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-10462.0 -64 +-1121.0 -89 +-11322.0 -101 +-11492.0 -78 +-15920.0 -64 +-4803.0 -64 +-6907.0 -64 +-7196.0 -2009 +-8080.0 -64 +-8118.0 -80 +-9842.0 -64 +10496.0 -67 +15601.0 -1733 +3520.0 -86 +4811.0 -115 +5241.0 -80 +557.0 -75 +7705.0 -88 +9452.0 -76 +NULL -32768 diff --git ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out index bca2d2a..f321770 100644 --- ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out @@ -1,3 +1,126 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: abs(ctinyint) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + selectExpressions: FuncAbsLongToLong(col 0:tinyint) -> 13:int + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 13:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git ql/src/test/results/clientpositive/llap/vectorization_parquet_projection.q.out ql/src/test/results/clientpositive/llap/vectorization_parquet_projection.q.out new file mode 100644 index 0000000..2b5a21e --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorization_parquet_projection.q.out @@ -0,0 +1,684 @@ +PREHOOK: query: DROP TABLE IF EXISTS parquet_types_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS parquet_types_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: DROP TABLE IF EXISTS parquet_project_test +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS parquet_project_test +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_project_test( +cint int, +m1 map +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_project_test +POSTHOOK: query: CREATE TABLE parquet_project_test( +cint int, +m1 map +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_project_test +PREHOOK: query: insert into parquet_project_test +select ctinyint, map("color","red") from parquet_types_staging +where ctinyint = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_project_test +POSTHOOK: query: insert into parquet_project_test +select ctinyint, map("color","red") from parquet_types_staging +where ctinyint = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_project_test +POSTHOOK: Lineage: parquet_project_test.cint EXPRESSION [] +POSTHOOK: Lineage: parquet_project_test.m1 EXPRESSION [] +PREHOOK: query: insert into parquet_project_test +select ctinyint, map("color","green") from parquet_types_staging +where ctinyint = 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_project_test +POSTHOOK: query: insert into parquet_project_test +select ctinyint, map("color","green") from parquet_types_staging +where ctinyint = 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_project_test +POSTHOOK: Lineage: parquet_project_test.cint EXPRESSION [] +POSTHOOK: Lineage: parquet_project_test.m1 EXPRESSION [] +PREHOOK: query: insert into parquet_project_test +select ctinyint, map("color","blue") from parquet_types_staging +where ctinyint = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_project_test +POSTHOOK: query: insert into parquet_project_test +select ctinyint, map("color","blue") from parquet_types_staging +where ctinyint = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_project_test +POSTHOOK: Lineage: parquet_project_test.cint EXPRESSION [] +POSTHOOK: Lineage: parquet_project_test.m1 EXPRESSION [] +PREHOOK: query: explain vectorization select * from parquet_project_test +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select * from parquet_project_test +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_project_test + Statistics: Num rows: 22 Data size: 20328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), m1 (type: map) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 20328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 20328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type MAP not enabled (map) since hive.vectorized.complex.types.enabled IS false + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from parquet_project_test +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +POSTHOOK: query: select * from parquet_project_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +1 {"color":"red"} +1 {"color":"red"} +1 {"color":"red"} +1 {"color":"red"} +1 {"color":"red"} +1 {"color":"red"} +1 {"color":"red"} +1 {"color":"red"} +2 {"color":"green"} +2 {"color":"green"} +2 {"color":"green"} +2 {"color":"green"} +2 {"color":"green"} +2 {"color":"green"} +2 {"color":"green"} +3 {"color":"blue"} +3 {"color":"blue"} +3 {"color":"blue"} +3 {"color":"blue"} +3 {"color":"blue"} +3 {"color":"blue"} +3 {"color":"blue"} +PREHOOK: query: explain vectorization select count(*) from parquet_project_test +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select count(*) from parquet_project_test +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_project_test + Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from parquet_project_test +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from parquet_project_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +22 +PREHOOK: query: explain vectorization select cint, count(*) from parquet_project_test +group by cint +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select cint, count(*) from parquet_project_test +group by cint +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_project_test + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cint (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint, count(*) from parquet_project_test +group by cint +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +POSTHOOK: query: select cint, count(*) from parquet_project_test +group by cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +1 8 +2 7 +3 7 +PREHOOK: query: explain vectorization select m1["color"], count(*) from parquet_project_test +group by m1["color"] +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select m1["color"], count(*) from parquet_project_test +group by m1["color"] +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_project_test + Statistics: Num rows: 22 Data size: 20240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: m1['color'] (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 20240 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 20240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 22 Data size: 20240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type MAP not enabled (map) since hive.vectorized.complex.types.enabled IS false + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 10120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 10120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select m1["color"], count(*) from parquet_project_test +group by m1["color"] +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +POSTHOOK: query: select m1["color"], count(*) from parquet_project_test +group by m1["color"] +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_project_test +#### A masked pattern was here #### +blue 7 +green 7 +red 8 +PREHOOK: query: create table if not exists parquet_nullsplit(key string, val string) partitioned by (len string) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_nullsplit +POSTHOOK: query: create table if not exists parquet_nullsplit(key string, val string) partitioned by (len string) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_nullsplit +PREHOOK: query: insert into table parquet_nullsplit partition(len='1') +values ('one', 'red') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@parquet_nullsplit@len=1 +POSTHOOK: query: insert into table parquet_nullsplit partition(len='1') +values ('one', 'red') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@parquet_nullsplit@len=1 +POSTHOOK: Lineage: parquet_nullsplit PARTITION(len=1).key SCRIPT [] +POSTHOOK: Lineage: parquet_nullsplit PARTITION(len=1).val SCRIPT [] +PREHOOK: query: explain vectorization select count(*) from parquet_nullsplit where len = '1' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select count(*) from parquet_nullsplit where len = '1' +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_nullsplit + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from parquet_nullsplit where len = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_nullsplit +PREHOOK: Input: default@parquet_nullsplit@len=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from parquet_nullsplit where len = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_nullsplit +POSTHOOK: Input: default@parquet_nullsplit@len=1 +#### A masked pattern was here #### +1 +PREHOOK: query: explain vectorization select count(*) from parquet_nullsplit where len = '99' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization select count(*) from parquet_nullsplit where len = '99' +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_nullsplit + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (len = '99') (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: unknown + Map Vectorization: + enabled: true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from parquet_nullsplit where len = '99' +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_nullsplit +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from parquet_nullsplit where len = '99' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_nullsplit +#### A masked pattern was here #### +0 +PREHOOK: query: drop table parquet_nullsplit +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_nullsplit +PREHOOK: Output: default@parquet_nullsplit +POSTHOOK: query: drop table parquet_nullsplit +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_nullsplit +POSTHOOK: Output: default@parquet_nullsplit +PREHOOK: query: drop table parquet_project_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_project_test +PREHOOK: Output: default@parquet_project_test +POSTHOOK: query: drop table parquet_project_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_project_test +POSTHOOK: Output: default@parquet_project_test +PREHOOK: query: drop table parquet_types_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: drop table parquet_types_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_types_staging diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index f56d9ce..ed17e5c 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -54,6 +54,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -91,6 +92,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -140,7 +147,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -158,7 +165,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -196,6 +203,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -233,6 +241,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -240,13 +254,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd @@ -275,6 +289,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -300,8 +315,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -315,14 +332,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -367,14 +397,14 @@ from alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -5110 4607 -PREHOOK: query: explain vectorization expression +4086 3583 +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd @@ -403,6 +433,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (cint) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (cint) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -428,8 +459,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -443,14 +476,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -514,10 +560,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.attr SCRIPT [] POSTHOOK: Lineage: test_1.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -540,6 +586,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -570,6 +617,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -588,10 +641,10 @@ POSTHOOK: Input: default@test_1 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -614,6 +667,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -644,6 +698,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -662,10 +722,10 @@ POSTHOOK: Input: default@test_1 3 4 1 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -688,6 +748,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -718,6 +779,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -754,10 +821,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.attr SCRIPT [] POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -780,6 +847,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -810,6 +878,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -828,10 +902,10 @@ POSTHOOK: Input: default@test_2 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -854,6 +928,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -884,6 +959,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -902,10 +983,10 @@ POSTHOOK: Input: default@test_2 3 4 NULL -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -928,6 +1009,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 @@ -958,6 +1040,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -976,3 +1064,227 @@ POSTHOOK: Input: default@test_2 NULL NULL 4 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 248718130534 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out new file mode 100644 index 0000000..673e607 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out @@ -0,0 +1,214 @@ +PREHOOK: query: create temporary table x (a int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: create temporary table x (a int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +PREHOOK: query: create temporary table y (b int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@y +POSTHOOK: query: create temporary table y (b int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@y +PREHOOK: query: insert into x values(1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@x +POSTHOOK: query: insert into x values(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@x +POSTHOOK: Lineage: x.a SCRIPT [] +PREHOOK: query: insert into y values(1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@y +POSTHOOK: query: insert into y values(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@y +POSTHOOK: Lineage: y.b SCRIPT [] +PREHOOK: query: explain vectorization expression +select count(1) from x, y where a = b +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select count(1) from x, y where a = b +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: b is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: b (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from x, y where a = b +PREHOOK: type: QUERY +PREHOOK: Input: default@x +PREHOOK: Input: default@y +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from x, y where a = b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@x +POSTHOOK: Input: default@y +#### A masked pattern was here #### +1 diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index 1ae77b4..75397d7 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -12,6 +12,18 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +PREHOOK: query: insert into decimal_date_test values (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_date_test +POSTHOOK: query: insert into decimal_date_test values (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_date_test +POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [] +POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [] +POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [] +POSTHOOK: Lineage: decimal_date_test.cdouble EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate @@ -35,7 +47,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -44,7 +56,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColumnInList(col 3:date, values [-67, -171]) predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 @@ -52,7 +64,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + @@ -60,7 +72,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -87,13 +99,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -128,7 +140,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -137,13 +149,13 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsFalse(col 5:boolean)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 5:boolean) predicate: (not (cdate) IN (1969-10-26, 1969-07-14, 1970-01-21)) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE Select Operator Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [] - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -235,7 +247,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -244,7 +256,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 @@ -252,7 +264,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + @@ -260,7 +272,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -287,13 +299,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -328,7 +340,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -337,13 +349,13 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsFalse(col 5:boolean)(children: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean) predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE Select Operator Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [] - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -435,7 +447,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -444,7 +456,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColumnBetween(col 3:date, left -2, right 1) predicate: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) - Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 274090 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 @@ -452,7 +464,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 274090 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + @@ -460,7 +472,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 274090 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -487,13 +499,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 274090 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 274090 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -528,7 +540,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -537,7 +549,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColumnNotBetween(col 3:date, left -610, right 608) predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean) - Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 2193525 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 @@ -545,7 +557,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 2193525 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + @@ -553,7 +565,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 2193525 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -580,13 +592,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 2193525 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 2193525 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -621,7 +633,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -630,7 +642,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColumnBetween(col 1:decimal(20,10), left -20, right 45.9918918919) predicate: cdecimal1 BETWEEN -20 AND 45.9918918919 (type: boolean) - Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 274090 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 @@ -638,7 +650,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 274090 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + @@ -646,7 +658,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 274090 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -673,13 +685,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 274090 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1365 Data size: 274112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 274090 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -714,7 +726,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -723,13 +735,13 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColumnNotBetween(col 1:decimal(20,10), left -2000, right 4390.1351351351) predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) - Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 2193525 Basic stats: COMPLETE Column stats: NONE Select Operator Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [] - Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10924 Data size: 2193525 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -1067,7 +1079,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1078,7 +1090,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5] selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 5:boolean - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -1092,7 +1104,7 @@ STAGE PLANS: keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1101,7 +1113,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -1135,7 +1147,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1143,7 +1155,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized @@ -1161,13 +1173,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1203,7 +1215,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1214,7 +1226,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5] selectExpressions: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -1228,7 +1240,7 @@ STAGE PLANS: keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1237,7 +1249,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -1271,7 +1283,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1279,7 +1291,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized @@ -1297,13 +1309,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1339,7 +1351,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1350,7 +1362,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5] selectExpressions: VectorUDFAdaptor(cdate BETWEEN 1969-12-30 AND 1970-01-02) -> 5:boolean - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -1364,7 +1376,7 @@ STAGE PLANS: keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1373,7 +1385,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -1407,7 +1419,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1415,7 +1427,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized @@ -1433,13 +1445,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1475,7 +1487,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1486,7 +1498,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5] selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 5:boolean - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -1500,7 +1512,7 @@ STAGE PLANS: keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1509,7 +1521,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -1543,7 +1555,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1551,7 +1563,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized @@ -1569,13 +1581,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1595,7 +1607,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 6230 +NULL 6231 false 6041 true 17 PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1606,7 +1618,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 3114 +NULL 3115 false 9165 true 9 PREHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1617,7 +1629,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-3 POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 6230 +NULL 6231 false 5974 true 84 PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1628,7 +1640,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AN POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 3114 +NULL 3115 false 3002 true 6172 PREHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1639,7 +1651,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 6230 +NULL 6231 false 6041 true 17 PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1650,7 +1662,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 3114 +NULL 3115 false 9165 true 9 PREHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1661,7 +1673,7 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-3 POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 6230 +NULL 6231 false 5974 true 84 PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 ORDER BY c0 @@ -1672,6 +1684,6 @@ POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AN POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -NULL 3114 +NULL 3115 false 3002 true 6172 diff --git ql/src/test/results/clientpositive/spark/vector_data_types.q.out ql/src/test/results/clientpositive/spark/vector_data_types.q.out index b5882ee..4561e2d 100644 --- ql/src/test/results/clientpositive/spark/vector_data_types.q.out +++ ql/src/test/results/clientpositive/spark/vector_data_types.q.out @@ -95,6 +95,25 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: insert into over1korc values (NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@over1korc +POSTHOOK: query: insert into over1korc values (NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@over1korc +POSTHOOK: Lineage: over1korc.b EXPRESSION [] +POSTHOOK: Lineage: over1korc.bin EXPRESSION [] +POSTHOOK: Lineage: over1korc.bo EXPRESSION [] +POSTHOOK: Lineage: over1korc.d EXPRESSION [] +POSTHOOK: Lineage: over1korc.dec EXPRESSION [] +POSTHOOK: Lineage: over1korc.f EXPRESSION [] +POSTHOOK: Lineage: over1korc.i EXPRESSION [] +POSTHOOK: Lineage: over1korc.s EXPRESSION [] +POSTHOOK: Lineage: over1korc.si EXPRESSION [] +POSTHOOK: Lineage: over1korc.t EXPRESSION [] +POSTHOOK: Lineage: over1korc.ts EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i LIMIT 20 @@ -118,15 +137,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Reducer 2 @@ -134,7 +153,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE @@ -160,6 +179,7 @@ POSTHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 374 65560 4294967516 65.43 22.48 true oscar quirinius 2013-03-01 09:11:58.703316 16.86 mathematics NULL 409 65536 4294967490 46.97 25.92 false fred miller 2013-03-01 09:11:58.703116 33.45 history NULL 473 65720 4294967324 80.74 40.6 false holly falkner 2013-03-01 09:11:58.703111 18.80 mathematics @@ -179,7 +199,6 @@ NULL 473 65720 4294967324 80.74 40.6 false holly falkner 2013-03-01 09:11:58.703 -2 461 65648 4294967425 58.52 24.85 false rachel thompson 2013-03-01 09:11:58.703318 85.62 zync studies -1 268 65778 4294967418 56.33 44.73 true calvin falkner 2013-03-01 09:11:58.70322 7.37 history -1 281 65643 4294967323 15.1 45.0 false irene nixon 2013-03-01 09:11:58.703223 80.96 undecided --1 300 65663 4294967343 71.26 34.62 true calvin ovid 2013-03-01 09:11:58.703262 78.56 study skills PREHOOK: query: SELECT SUM(HASH(*)) FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q PREHOOK: type: QUERY @@ -214,7 +233,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -224,7 +243,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ @@ -232,7 +251,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized @@ -261,7 +280,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: @@ -293,6 +312,7 @@ POSTHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 374 65560 4294967516 65.43 22.48 true oscar quirinius 2013-03-01 09:11:58.703316 16.86 mathematics NULL 409 65536 4294967490 46.97 25.92 false fred miller 2013-03-01 09:11:58.703116 33.45 history NULL 473 65720 4294967324 80.74 40.6 false holly falkner 2013-03-01 09:11:58.703111 18.80 mathematics @@ -312,7 +332,113 @@ NULL 473 65720 4294967324 80.74 40.6 false holly falkner 2013-03-01 09:11:58.703 -2 461 65648 4294967425 58.52 24.85 false rachel thompson 2013-03-01 09:11:58.703318 85.62 zync studies -1 268 65778 4294967418 56.33 44.73 true calvin falkner 2013-03-01 09:11:58.70322 7.37 history -1 281 65643 4294967323 15.1 45.0 false irene nixon 2013-03-01 09:11:58.703223 80.96 undecided --1 300 65663 4294967343 71.26 34.62 true calvin ovid 2013-03-01 09:11:58.703262 78.56 study skills +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT SUM(HASH(*)) +FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT SUM(HASH(*)) +FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: hash(t,si,i,b,f,d,bo,s,ts,dec,bin) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [12] + selectExpressions: VectorUDFAdaptor(hash(t,si,i,b,f,d,bo,s,ts,dec,bin)) -> 12:int + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 12:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(HASH(*)) FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index c913871..9880bd7 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -20,6 +20,18 @@ POSTHOOK: Lineage: decimal_vgby.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_vgby.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into decimal_vgby values (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_vgby +POSTHOOK: query: insert into decimal_vgby values (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_vgby +POSTHOOK: Lineage: decimal_vgby.cdecimal1 EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby.cdecimal2 EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby.cdouble EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby.cint EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), @@ -55,7 +67,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:cint:int, 4:ROW__ID:struct] @@ -66,7 +78,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3] - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() Group By Vectorization: @@ -80,7 +92,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -91,7 +103,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -138,14 +150,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1082441 Basic stats: COMPLETE Column stats: NONE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: FilterLongColGreaterLongScalar(col 9:bigint, val 1) predicate: (_col9 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -153,13 +165,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -232,7 +244,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:cint:int, 4:ROW__ID:struct] @@ -243,7 +255,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3] - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() Group By Vectorization: @@ -257,7 +269,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -268,7 +280,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -315,14 +327,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1082441 Basic stats: COMPLETE Column stats: NONE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: FilterLongColGreaterLongScalar(col 15:bigint, val 1) predicate: (_col15 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -330,13 +342,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -396,6 +408,18 @@ POSTHOOK: Lineage: decimal_vgby_small.cdecimal1 EXPRESSION [(alltypesorc)alltype POSTHOOK: Lineage: decimal_vgby_small.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby_small.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby_small.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into decimal_vgby_small values (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_vgby_small +POSTHOOK: query: insert into decimal_vgby_small values (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_vgby_small +POSTHOOK: Lineage: decimal_vgby_small.cdecimal1 EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby_small.cdecimal2 EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby_small.cdouble EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby_small.cint EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), @@ -431,7 +455,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby_small - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(11,5)/DECIMAL_64, 2:cdecimal2:decimal(16,0)/DECIMAL_64, 3:cint:int, 4:ROW__ID:struct] @@ -442,7 +466,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3] - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() Group By Vectorization: @@ -456,7 +480,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -467,7 +491,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: bigint), _col6 (type: decimal(16,0)), _col7 (type: decimal(16,0)), _col8 (type: decimal(26,0)), _col9 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -514,14 +538,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 6144 Data size: 173230 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 173221 Basic stats: COMPLETE Column stats: NONE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: FilterLongColGreaterLongScalar(col 9:bigint, val 1) predicate: (_col9 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 57743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: bigint), _col6 (type: decimal(16,0)), _col7 (type: decimal(16,0)), _col8 (type: decimal(26,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -529,13 +553,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 2048 Data size: 57743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 57743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -573,6 +597,25 @@ POSTHOOK: Input: default@decimal_vgby_small 6981 2 -515.62107 -515.62107 -1031.24214 3 6984454 -618 6983218 762 1 1531.21941 1531.21941 1531.21941 2 6984454 1834 6986288 NULL 3072 9318.43514 -4298.15135 5018444.11392 3072 11161 -5148 6010880 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_vgby_small +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_vgby_small +#### A masked pattern was here #### +-18663521580 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), @@ -608,7 +651,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby_small - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(11,5)/DECIMAL_64, 2:cdecimal2:decimal(16,0)/DECIMAL_64, 3:cint:int, 4:ROW__ID:struct] @@ -619,7 +662,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3] - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() Group By Vectorization: @@ -633,7 +676,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -644,7 +687,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -691,14 +734,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 6144 Data size: 173230 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 173221 Basic stats: COMPLETE Column stats: NONE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: FilterLongColGreaterLongScalar(col 15:bigint, val 1) predicate: (_col15 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 57743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: decimal(15,9)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: decimal(20,4)), _col13 (type: double), _col14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -706,13 +749,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - Statistics: Num rows: 2048 Data size: 57743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 57743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -750,3 +793,22 @@ POSTHOOK: Input: default@decimal_vgby_small 6981 2 -515.62107 -515.62107 -1031.24214 -515.621070000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175 762 1 1531.21941 1531.21941 1531.21941 1531.219410000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881 NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.608110000 5695.483083909642 5696.410309489072 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_vgby_small +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_vgby_small +#### A masked pattern was here #### +91757235680 diff --git ql/src/test/results/clientpositive/spark/vectorization_div0.q.out ql/src/test/results/clientpositive/spark/vectorization_div0.q.out index 105f7c3..530bdf6 100644 --- ql/src/test/results/clientpositive/spark/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_div0.q.out @@ -290,9 +290,9 @@ from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --985319 NULL -0.000001217879691754650 --985319 2.0297994862577501E-4 -0.000001217879691754650 -63925 0.11256941728588189 -0.000018771998435666797 +-985319 2.0297994862577501E-4 -0.000001217879691754650 +-985319 NULL -0.000001217879691754650 0 NULL NULL 0 NULL NULL 0 NULL NULL @@ -302,21 +302,6 @@ POSTHOOK: Input: default@alltypesorc 0 NULL NULL 0 NULL NULL 0 NULL NULL -392309 NULL 0.000003058813333367320 -673083 -0.010691103474608629 0.000001782841046349410 -2331159 NULL 0.000000514765402102559 -2342037 NULL 0.000000512374484263058 -3533105 -5.660743170667161E-5 0.000000339644590240030 -3768727 0.004139594085748318 0.000000318409903397089 -4728619 NULL 0.000000253773881972728 -5391403 NULL 0.000000222576572369010 -7022666 -0.0010246820794268159 0.000000170875277280736 -7470430 NULL 0.000000160633323650714 -8276429 NULL 0.000000144990067576246 -8286860 -8.683626850218298E-4 0.000000144807562816314 -8299981 -8.669899364829872E-4 0.000000144578644216174 -9247593 NULL 0.000000129763496295739 -9821695 -7.326637611939691E-4 0.000000122178503812224 10000738 0.001559984873116364 0.000000119991144653525 10081828 0.0015474376273826532 0.000000119026033770860 10745355 -6.696847149303117E-4 0.000000111676161466978 @@ -336,6 +321,8 @@ POSTHOOK: Input: default@alltypesorc 20165679 7.736411950224934E-4 0.000000059507046601307 20547875 NULL 0.000000058400199534015 23264783 NULL 0.000000051580107151655 +2331159 NULL 0.000000514765402102559 +2342037 NULL 0.000000512374484263058 23475527 6.645644206411213E-4 0.000000051117063314489 24379905 NULL 0.000000049220864478348 24514624 -2.935390728407664E-4 0.000000048950373458716 @@ -351,12 +338,15 @@ POSTHOOK: Input: default@alltypesorc 33126539 NULL 0.000000036224732079617 34603086 NULL 0.000000034678987879867 35156265 NULL 0.000000034133318769784 +3533105 -5.660743170667161E-5 0.000000339644590240030 35862260 NULL 0.000000033461360215447 36123797 -1.992038655294182E-4 0.000000033219099310075 36341671 -1.980096072082101E-4 0.000000033019945615599 36413215 -5.4925114412446145E-6 0.000000032955068647468 36578596 4.2650625518814335E-4 0.000000032806070522772 36796441 -1.955623914823719E-4 0.000000032611849607955 +3768727 0.004139594085748318 0.000000318409903397089 +392309 NULL 0.000003058813333367320 39723587 NULL 0.000000030208752296211 39985709 -1.7996429674411925E-4 0.000000030010722080731 40018606 NULL 0.000000029986051987918 @@ -369,6 +359,7 @@ POSTHOOK: Input: default@alltypesorc 45717793 3.4124569399052136E-4 0.000000026247986205283 46163162 NULL 0.000000025994753132379 46525838 3.353190543284787E-4 0.000000025792120068853 +4728619 NULL 0.000000253773881972728 48626663 NULL 0.000000024677819244969 49102701 -1.465499830650864E-4 0.000000024438574163161 50300445 -1.4306036457530346E-4 0.000000023856647789100 @@ -377,6 +368,7 @@ POSTHOOK: Input: default@alltypesorc 52667422 2.9621727070673783E-4 0.000000022784483356713 52962061 2.945693522010029E-4 0.000000022657728520044 53695172 NULL 0.000000022348377988248 +5391403 NULL 0.000000222576572369010 54760317 NULL 0.000000021913678841560 55020655 2.835480602693661E-4 0.000000021809991175132 56102034 NULL 0.000000021389598815615 @@ -390,6 +382,14 @@ POSTHOOK: Input: default@alltypesorc 59347745 NULL 0.000000020219807846111 60229567 NULL 0.000000019923769334088 60330397 NULL 0.000000019890470801974 +673083 -0.010691103474608629 0.000001782841046349410 +7022666 -0.0010246820794268159 0.000000170875277280736 +7470430 NULL 0.000000160633323650714 +8276429 NULL 0.000000144990067576246 +8286860 -8.683626850218298E-4 0.000000144807562816314 +8299981 -8.669899364829872E-4 0.000000144578644216174 +9247593 NULL 0.000000129763496295739 +9821695 -7.326637611939691E-4 0.000000122178503812224 PREHOOK: query: explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 @@ -506,68 +506,68 @@ from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --292.0 NULL 1.0 NULL -0.010273972602739725 -0.00410958904109589 --290.0 NULL 1.0 NULL -0.010344827586206896 -0.004137931034482759 --289.0 NULL 1.0 NULL -0.010380622837370242 -0.004152249134948096 --281.0 NULL 1.0 NULL -0.010676156583629894 -0.004270462633451957 --279.0 NULL 1.0 NULL -0.010752688172043012 -0.004301075268817204 --274.0 6888911.518248175 1.0 6888911.518248175 -0.010948905109489052 -0.00437956204379562 --273.0 6028764.868131869 1.0 6028764.868131869 -0.01098901098901099 -0.004395604395604396 --257.0 6404096.53307393 1.0 6404096.53307393 -0.011673151750972763 -0.004669260700389105 --250.0 6583411.236 1.0 6583411.236 -0.012 -0.0048 --247.0 NULL 1.0 NULL -0.012145748987854251 -0.004858299595141701 --247.0 -7546669.174089069 1.0 -7546669.174089069 -0.012145748987854251 -0.004858299595141701 --246.0 NULL 1.0 NULL -0.012195121951219513 -0.004878048780487805 --237.0 NULL 1.0 NULL -0.012658227848101266 -0.005063291139240506 --236.0 NULL 1.0 NULL -0.012711864406779662 -0.005084745762711864 --229.0 7187130.170305677 1.0 7187130.170305677 -0.013100436681222707 -0.005240174672489083 --228.0 8278779.631578947 1.0 8278779.631578947 -0.013157894736842105 -0.005263157894736842 --225.0 NULL 1.0 NULL -0.013333333333333334 -0.005333333333333333 --210.0 -8876320.40952381 1.0 -8876320.40952381 -0.014285714285714285 -0.005714285714285714 --201.0 NULL 1.0 NULL -0.014925373134328358 -0.005970149253731343 --199.0 NULL 1.0 NULL -0.01507537688442211 -0.006030150753768844 --189.0 NULL 1.0 NULL -0.015873015873015872 -0.006349206349206349 --188.0 NULL 1.0 NULL -0.015957446808510637 -0.006382978723404255 --184.0 8944852.222826088 1.0 8944852.222826088 -0.016304347826086956 -0.006521739130434782 --183.0 8993731.196721312 1.0 8993731.196721312 -0.01639344262295082 -0.006557377049180328 --181.0 NULL 1.0 NULL -0.016574585635359115 -0.0066298342541436465 --179.0 NULL 1.0 NULL -0.01675977653631285 -0.0067039106145251395 --169.0 9738774.01775148 1.0 9738774.01775148 -0.01775147928994083 -0.007100591715976331 --164.0 NULL 1.0 NULL -0.018292682926829267 -0.007317073170731707 --161.0 NULL 1.0 NULL -0.018633540372670808 -0.007453416149068323 --154.0 1.2256894519480519E7 1.0 1.2256894519480519E7 -0.01948051948051948 -0.007792207792207792 --152.0 NULL 1.0 NULL -0.019736842105263157 -0.007894736842105263 --148.0 NULL 1.0 NULL -0.02027027027027027 -0.008108108108108109 --140.0 NULL 1.0 NULL -0.02142857142857143 -0.008571428571428572 --138.0 NULL 1.0 NULL -0.021739130434782608 -0.008695652173913044 --137.0 NULL 1.0 NULL -0.021897810218978103 -0.00875912408759124 --132.0 NULL 1.0 NULL -0.022727272727272728 -0.00909090909090909 --129.0 1.2758548906976745E7 1.0 1.2758548906976745E7 -0.023255813953488372 -0.009302325581395349 --128.0 NULL 1.0 NULL -0.0234375 -0.009375 --126.0 NULL 1.0 NULL -0.023809523809523808 -0.009523809523809523 --126.0 -1.4793867349206349E7 1.0 -1.4793867349206349E7 -0.023809523809523808 -0.009523809523809523 --116.0 NULL 1.0 NULL -0.02586206896551724 -0.010344827586206896 --113.0 NULL 1.0 NULL -0.02654867256637168 -0.010619469026548672 -113.0 -1.6495816690265486E7 1.0 -1.6495816690265486E7 -0.02654867256637168 -0.010619469026548672 --96.0 NULL 1.0 NULL -0.03125 -0.012499999999999999 --94.0 -1.9830077510638297E7 1.0 -1.9830077510638297E7 -0.031914893617021274 -0.01276595744680851 --93.0 NULL 1.0 NULL -0.03225806451612903 -0.012903225806451613 --77.0 2.4513789038961038E7 1.0 2.4513789038961038E7 -0.03896103896103896 -0.015584415584415584 --69.0 2.735596747826087E7 1.0 2.735596747826087E7 -0.043478260869565216 -0.017391304347826087 --62.0 NULL 1.0 NULL -0.04838709677419355 -0.01935483870967742 --62.0 3.0444544451612905E7 1.0 3.0444544451612905E7 -0.04838709677419355 -0.01935483870967742 --60.0 NULL 1.0 NULL -0.05 -0.02 --57.0 -3.27022330877193E7 1.0 -3.27022330877193E7 -0.05263157894736842 -0.021052631578947368 --49.0 3.35888328367347E7 1.0 3.35888328367347E7 -0.061224489795918366 -0.024489795918367346 --46.0 3.577940889130435E7 1.0 3.577940889130435E7 -0.06521739130434782 -0.02608695652173913 --38.0 4.3311916026315786E7 1.0 4.3311916026315786E7 -0.07894736842105263 -0.031578947368421054 +-113.0 NULL 1.0 NULL -0.02654867256637168 -0.010619469026548672 +-116.0 NULL 1.0 NULL -0.02586206896551724 -0.010344827586206896 +-12.0 -1.5533560716666666E8 1.0 -1.5533560716666666E8 -0.25 -0.09999999999999999 +-126.0 -1.4793867349206349E7 1.0 -1.4793867349206349E7 -0.023809523809523808 -0.009523809523809523 +-126.0 NULL 1.0 NULL -0.023809523809523808 -0.009523809523809523 +-128.0 NULL 1.0 NULL -0.0234375 -0.009375 +-129.0 1.2758548906976745E7 1.0 1.2758548906976745E7 -0.023255813953488372 -0.009302325581395349 +-132.0 NULL 1.0 NULL -0.022727272727272728 -0.00909090909090909 +-137.0 NULL 1.0 NULL -0.021897810218978103 -0.00875912408759124 +-138.0 NULL 1.0 NULL -0.021739130434782608 -0.008695652173913044 +-140.0 NULL 1.0 NULL -0.02142857142857143 -0.008571428571428572 +-148.0 NULL 1.0 NULL -0.02027027027027027 -0.008108108108108109 +-152.0 NULL 1.0 NULL -0.019736842105263157 -0.007894736842105263 +-154.0 1.2256894519480519E7 1.0 1.2256894519480519E7 -0.01948051948051948 -0.007792207792207792 +-161.0 NULL 1.0 NULL -0.018633540372670808 -0.007453416149068323 +-164.0 NULL 1.0 NULL -0.018292682926829267 -0.007317073170731707 +-169.0 9738774.01775148 1.0 9738774.01775148 -0.01775147928994083 -0.007100591715976331 +-17.0 NULL 1.0 NULL -0.17647058823529413 -0.07058823529411765 +-179.0 NULL 1.0 NULL -0.01675977653631285 -0.0067039106145251395 +-181.0 NULL 1.0 NULL -0.016574585635359115 -0.0066298342541436465 +-183.0 8993731.196721312 1.0 8993731.196721312 -0.01639344262295082 -0.006557377049180328 +-184.0 8944852.222826088 1.0 8944852.222826088 -0.016304347826086956 -0.006521739130434782 +-188.0 NULL 1.0 NULL -0.015957446808510637 -0.006382978723404255 +-189.0 NULL 1.0 NULL -0.015873015873015872 -0.006349206349206349 +-199.0 NULL 1.0 NULL -0.01507537688442211 -0.006030150753768844 +-20.0 NULL 1.0 NULL -0.15 -0.06 +-201.0 NULL 1.0 NULL -0.014925373134328358 -0.005970149253731343 +-21.0 8.988389314285715E7 1.0 8.988389314285715E7 -0.14285714285714285 -0.05714285714285714 +-210.0 -8876320.40952381 1.0 -8876320.40952381 -0.014285714285714285 -0.005714285714285714 +-225.0 NULL 1.0 NULL -0.013333333333333334 -0.005333333333333333 +-228.0 8278779.631578947 1.0 8278779.631578947 -0.013157894736842105 -0.005263157894736842 +-229.0 7187130.170305677 1.0 7187130.170305677 -0.013100436681222707 -0.005240174672489083 +-236.0 NULL 1.0 NULL -0.012711864406779662 -0.005084745762711864 +-237.0 NULL 1.0 NULL -0.012658227848101266 -0.005063291139240506 +-246.0 NULL 1.0 NULL -0.012195121951219513 -0.004878048780487805 +-247.0 -7546669.174089069 1.0 -7546669.174089069 -0.012145748987854251 -0.004858299595141701 +-247.0 NULL 1.0 NULL -0.012145748987854251 -0.004858299595141701 +-250.0 6583411.236 1.0 6583411.236 -0.012 -0.0048 +-257.0 6404096.53307393 1.0 6404096.53307393 -0.011673151750972763 -0.004669260700389105 +-273.0 6028764.868131869 1.0 6028764.868131869 -0.01098901098901099 -0.004395604395604396 +-274.0 6888911.518248175 1.0 6888911.518248175 -0.010948905109489052 -0.00437956204379562 +-279.0 NULL 1.0 NULL -0.010752688172043012 -0.004301075268817204 -28.0 5.878045746428572E7 1.0 5.878045746428572E7 -0.10714285714285714 -0.04285714285714286 -28.0 6.741291985714285E7 1.0 6.741291985714285E7 -0.10714285714285714 -0.04285714285714286 --21.0 8.988389314285715E7 1.0 8.988389314285715E7 -0.14285714285714285 -0.05714285714285714 --20.0 NULL 1.0 NULL -0.15 -0.06 --17.0 NULL 1.0 NULL -0.17647058823529413 -0.07058823529411765 --12.0 -1.5533560716666666E8 1.0 -1.5533560716666666E8 -0.25 -0.09999999999999999 +-281.0 NULL 1.0 NULL -0.010676156583629894 -0.004270462633451957 +-289.0 NULL 1.0 NULL -0.010380622837370242 -0.004152249134948096 +-290.0 NULL 1.0 NULL -0.010344827586206896 -0.004137931034482759 +-292.0 NULL 1.0 NULL -0.010273972602739725 -0.00410958904109589 -3.0 NULL 1.0 NULL -1.0 -0.39999999999999997 +-38.0 4.3311916026315786E7 1.0 4.3311916026315786E7 -0.07894736842105263 -0.031578947368421054 +-46.0 3.577940889130435E7 1.0 3.577940889130435E7 -0.06521739130434782 -0.02608695652173913 +-49.0 3.35888328367347E7 1.0 3.35888328367347E7 -0.061224489795918366 -0.024489795918367346 +-57.0 -3.27022330877193E7 1.0 -3.27022330877193E7 -0.05263157894736842 -0.021052631578947368 +-60.0 NULL 1.0 NULL -0.05 -0.02 +-62.0 3.0444544451612905E7 1.0 3.0444544451612905E7 -0.04838709677419355 -0.01935483870967742 +-62.0 NULL 1.0 NULL -0.04838709677419355 -0.01935483870967742 +-69.0 2.735596747826087E7 1.0 2.735596747826087E7 -0.043478260869565216 -0.017391304347826087 +-77.0 2.4513789038961038E7 1.0 2.4513789038961038E7 -0.03896103896103896 -0.015584415584415584 +-93.0 NULL 1.0 NULL -0.03225806451612903 -0.012903225806451613 +-94.0 -1.9830077510638297E7 1.0 -1.9830077510638297E7 -0.031914893617021274 -0.01276595744680851 +-96.0 NULL 1.0 NULL -0.03125 -0.012499999999999999 0.0 NULL NULL NULL NULL NULL 0.0 NULL NULL NULL NULL NULL 0.0 NULL NULL NULL NULL NULL @@ -722,103 +722,103 @@ from alltypesorc where cint > 500000000 or cdouble > 1000000000 or ctinyint = 0 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -528534767 NULL -50 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 33 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 -528534767 NULL NULL NULL NULL NULL NULL NULL NULL +528534767 NULL -1 NULL NULL 1.0 NULL NULL 0 528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 16 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 62 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -51 NULL NULL 1.0 NULL NULL 0 528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -62 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 39 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -32 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -7 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 24 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -24 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -44 NULL NULL 1.0 NULL NULL 0 -NULL 1018195815 0 NULL NULL NULL NULL NULL NULL -528534767 NULL 24 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 4 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -57 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 28 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -12 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -13 NULL NULL 1.0 NULL NULL 0 528534767 NULL -16 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 46 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 528534767 NULL -16 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -54 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 528534767 NULL -19 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 5 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 19 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -24 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -30 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -32 NULL NULL 1.0 NULL NULL 0 528534767 NULL -33 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 18 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 30 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -33 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 528534767 NULL -36 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -37 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 528534767 NULL -40 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 21 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -59 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 0 NULL NULL NULL NULL NULL NULL -528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -33 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -30 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -43 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -44 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -47 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -53 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 27 NULL NULL 1.0 NULL NULL 0 -528534767 NULL NULL NULL NULL NULL NULL NULL NULL -528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 43 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 41 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 13 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -50 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -51 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -53 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -54 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -57 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -59 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -62 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -7 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 0 NULL NULL NULL NULL NULL NULL +528534767 NULL 0 NULL NULL NULL NULL NULL NULL 528534767 NULL 10 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 13 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 16 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 18 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 19 NULL NULL 1.0 NULL NULL 0 528534767 NULL 2 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -37 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -43 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -1 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -12 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 0 NULL NULL NULL NULL NULL NULL +528534767 NULL 21 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 24 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 24 NULL NULL 1.0 NULL NULL 0 528534767 NULL 26 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 9 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -13 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 27 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 28 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 30 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 33 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -47 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 39 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 4 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 41 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 43 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 46 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 5 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 62 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 9 NULL NULL 1.0 NULL NULL 0 +528534767 NULL NULL NULL NULL NULL NULL NULL NULL +528534767 NULL NULL NULL NULL NULL NULL NULL NULL +NULL 1018195815 0 NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out index bca2d2a..96ad3ad 100644 --- ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out @@ -1,3 +1,124 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: abs(ctinyint) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + selectExpressions: FuncAbsLongToLong(col 0:tinyint) -> 13:int + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 13:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index c1dd74c..8fff1ed 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -54,6 +54,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -90,6 +91,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -139,7 +146,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -157,7 +164,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -195,6 +202,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -231,6 +239,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -238,13 +252,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd @@ -272,6 +286,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -297,8 +312,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized @@ -311,14 +328,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -363,14 +393,14 @@ from alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -5110 4607 -PREHOOK: query: explain vectorization expression +4086 3583 +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd @@ -398,6 +428,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (cint) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (cint) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -423,8 +454,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized @@ -437,14 +470,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -508,10 +554,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.attr SCRIPT [] POSTHOOK: Lineage: test_1.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -534,6 +580,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -563,6 +610,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -581,10 +634,10 @@ POSTHOOK: Input: default@test_1 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -607,6 +660,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -636,6 +690,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -654,10 +714,10 @@ POSTHOOK: Input: default@test_1 3 4 1 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -680,6 +740,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -709,6 +770,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -745,10 +812,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.attr SCRIPT [] POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -771,6 +838,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -800,6 +868,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -818,10 +892,10 @@ POSTHOOK: Input: default@test_2 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -844,6 +918,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -873,6 +948,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -891,10 +972,10 @@ POSTHOOK: Input: default@test_2 3 4 NULL -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -917,6 +998,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 @@ -946,6 +1028,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -964,3 +1052,227 @@ POSTHOOK: Input: default@test_2 NULL NULL 4 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 248718130534 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 diff --git ql/src/test/results/clientpositive/tez/vectorization_div0.q.out ql/src/test/results/clientpositive/tez/vectorization_div0.q.out index af87ffe..4afe85c 100644 --- ql/src/test/results/clientpositive/tez/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_div0.q.out @@ -291,9 +291,9 @@ from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### --985319 NULL -0.000001217879691754650 --985319 2.0297994862577501E-4 -0.000001217879691754650 -63925 0.11256941728588189 -0.000018771998435666797 +-985319 2.0297994862577501E-4 -0.000001217879691754650 +-985319 NULL -0.000001217879691754650 0 NULL NULL 0 NULL NULL 0 NULL NULL @@ -303,21 +303,6 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 0 NULL NULL 0 NULL NULL 0 NULL NULL -392309 NULL 0.000003058813333367320 -673083 -0.010691103474608629 0.000001782841046349410 -2331159 NULL 0.000000514765402102559 -2342037 NULL 0.000000512374484263058 -3533105 -5.660743170667161E-5 0.000000339644590240030 -3768727 0.004139594085748318 0.000000318409903397089 -4728619 NULL 0.000000253773881972728 -5391403 NULL 0.000000222576572369010 -7022666 -0.0010246820794268159 0.000000170875277280736 -7470430 NULL 0.000000160633323650714 -8276429 NULL 0.000000144990067576246 -8286860 -8.683626850218298E-4 0.000000144807562816314 -8299981 -8.669899364829872E-4 0.000000144578644216174 -9247593 NULL 0.000000129763496295739 -9821695 -7.326637611939691E-4 0.000000122178503812224 10000738 0.001559984873116364 0.000000119991144653525 10081828 0.0015474376273826532 0.000000119026033770860 10745355 -6.696847149303117E-4 0.000000111676161466978 @@ -337,6 +322,8 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 20165679 7.736411950224934E-4 0.000000059507046601307 20547875 NULL 0.000000058400199534015 23264783 NULL 0.000000051580107151655 +2331159 NULL 0.000000514765402102559 +2342037 NULL 0.000000512374484263058 23475527 6.645644206411213E-4 0.000000051117063314489 24379905 NULL 0.000000049220864478348 24514624 -2.935390728407664E-4 0.000000048950373458716 @@ -352,12 +339,15 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 33126539 NULL 0.000000036224732079617 34603086 NULL 0.000000034678987879867 35156265 NULL 0.000000034133318769784 +3533105 -5.660743170667161E-5 0.000000339644590240030 35862260 NULL 0.000000033461360215447 36123797 -1.992038655294182E-4 0.000000033219099310075 36341671 -1.980096072082101E-4 0.000000033019945615599 36413215 -5.4925114412446145E-6 0.000000032955068647468 36578596 4.2650625518814335E-4 0.000000032806070522772 36796441 -1.955623914823719E-4 0.000000032611849607955 +3768727 0.004139594085748318 0.000000318409903397089 +392309 NULL 0.000003058813333367320 39723587 NULL 0.000000030208752296211 39985709 -1.7996429674411925E-4 0.000000030010722080731 40018606 NULL 0.000000029986051987918 @@ -370,6 +360,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 45717793 3.4124569399052136E-4 0.000000026247986205283 46163162 NULL 0.000000025994753132379 46525838 3.353190543284787E-4 0.000000025792120068853 +4728619 NULL 0.000000253773881972728 48626663 NULL 0.000000024677819244969 49102701 -1.465499830650864E-4 0.000000024438574163161 50300445 -1.4306036457530346E-4 0.000000023856647789100 @@ -378,6 +369,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 52667422 2.9621727070673783E-4 0.000000022784483356713 52962061 2.945693522010029E-4 0.000000022657728520044 53695172 NULL 0.000000022348377988248 +5391403 NULL 0.000000222576572369010 54760317 NULL 0.000000021913678841560 55020655 2.835480602693661E-4 0.000000021809991175132 56102034 NULL 0.000000021389598815615 @@ -391,6 +383,14 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 59347745 NULL 0.000000020219807846111 60229567 NULL 0.000000019923769334088 60330397 NULL 0.000000019890470801974 +673083 -0.010691103474608629 0.000001782841046349410 +7022666 -0.0010246820794268159 0.000000170875277280736 +7470430 NULL 0.000000160633323650714 +8276429 NULL 0.000000144990067576246 +8286860 -8.683626850218298E-4 0.000000144807562816314 +8299981 -8.669899364829872E-4 0.000000144578644216174 +9247593 NULL 0.000000129763496295739 +9821695 -7.326637611939691E-4 0.000000122178503812224 PREHOOK: query: explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 @@ -508,68 +508,68 @@ from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### --292.0 NULL 1.0 NULL -0.010273972602739725 -0.00410958904109589 --290.0 NULL 1.0 NULL -0.010344827586206896 -0.004137931034482759 --289.0 NULL 1.0 NULL -0.010380622837370242 -0.004152249134948096 --281.0 NULL 1.0 NULL -0.010676156583629894 -0.004270462633451957 --279.0 NULL 1.0 NULL -0.010752688172043012 -0.004301075268817204 --274.0 6888911.518248175 1.0 6888911.518248175 -0.010948905109489052 -0.00437956204379562 --273.0 6028764.868131869 1.0 6028764.868131869 -0.01098901098901099 -0.004395604395604396 --257.0 6404096.53307393 1.0 6404096.53307393 -0.011673151750972763 -0.004669260700389105 --250.0 6583411.236 1.0 6583411.236 -0.012 -0.0048 --247.0 NULL 1.0 NULL -0.012145748987854251 -0.004858299595141701 --247.0 -7546669.174089069 1.0 -7546669.174089069 -0.012145748987854251 -0.004858299595141701 --246.0 NULL 1.0 NULL -0.012195121951219513 -0.004878048780487805 --237.0 NULL 1.0 NULL -0.012658227848101266 -0.005063291139240506 --236.0 NULL 1.0 NULL -0.012711864406779662 -0.005084745762711864 --229.0 7187130.170305677 1.0 7187130.170305677 -0.013100436681222707 -0.005240174672489083 --228.0 8278779.631578947 1.0 8278779.631578947 -0.013157894736842105 -0.005263157894736842 --225.0 NULL 1.0 NULL -0.013333333333333334 -0.005333333333333333 --210.0 -8876320.40952381 1.0 -8876320.40952381 -0.014285714285714285 -0.005714285714285714 --201.0 NULL 1.0 NULL -0.014925373134328358 -0.005970149253731343 --199.0 NULL 1.0 NULL -0.01507537688442211 -0.006030150753768844 --189.0 NULL 1.0 NULL -0.015873015873015872 -0.006349206349206349 --188.0 NULL 1.0 NULL -0.015957446808510637 -0.006382978723404255 --184.0 8944852.222826088 1.0 8944852.222826088 -0.016304347826086956 -0.006521739130434782 --183.0 8993731.196721312 1.0 8993731.196721312 -0.01639344262295082 -0.006557377049180328 --181.0 NULL 1.0 NULL -0.016574585635359115 -0.0066298342541436465 --179.0 NULL 1.0 NULL -0.01675977653631285 -0.0067039106145251395 --169.0 9738774.01775148 1.0 9738774.01775148 -0.01775147928994083 -0.007100591715976331 --164.0 NULL 1.0 NULL -0.018292682926829267 -0.007317073170731707 --161.0 NULL 1.0 NULL -0.018633540372670808 -0.007453416149068323 --154.0 1.2256894519480519E7 1.0 1.2256894519480519E7 -0.01948051948051948 -0.007792207792207792 --152.0 NULL 1.0 NULL -0.019736842105263157 -0.007894736842105263 --148.0 NULL 1.0 NULL -0.02027027027027027 -0.008108108108108109 --140.0 NULL 1.0 NULL -0.02142857142857143 -0.008571428571428572 --138.0 NULL 1.0 NULL -0.021739130434782608 -0.008695652173913044 --137.0 NULL 1.0 NULL -0.021897810218978103 -0.00875912408759124 --132.0 NULL 1.0 NULL -0.022727272727272728 -0.00909090909090909 --129.0 1.2758548906976745E7 1.0 1.2758548906976745E7 -0.023255813953488372 -0.009302325581395349 --128.0 NULL 1.0 NULL -0.0234375 -0.009375 --126.0 NULL 1.0 NULL -0.023809523809523808 -0.009523809523809523 --126.0 -1.4793867349206349E7 1.0 -1.4793867349206349E7 -0.023809523809523808 -0.009523809523809523 --116.0 NULL 1.0 NULL -0.02586206896551724 -0.010344827586206896 --113.0 NULL 1.0 NULL -0.02654867256637168 -0.010619469026548672 -113.0 -1.6495816690265486E7 1.0 -1.6495816690265486E7 -0.02654867256637168 -0.010619469026548672 --96.0 NULL 1.0 NULL -0.03125 -0.012499999999999999 --94.0 -1.9830077510638297E7 1.0 -1.9830077510638297E7 -0.031914893617021274 -0.01276595744680851 --93.0 NULL 1.0 NULL -0.03225806451612903 -0.012903225806451613 --77.0 2.4513789038961038E7 1.0 2.4513789038961038E7 -0.03896103896103896 -0.015584415584415584 --69.0 2.735596747826087E7 1.0 2.735596747826087E7 -0.043478260869565216 -0.017391304347826087 --62.0 NULL 1.0 NULL -0.04838709677419355 -0.01935483870967742 --62.0 3.0444544451612905E7 1.0 3.0444544451612905E7 -0.04838709677419355 -0.01935483870967742 --60.0 NULL 1.0 NULL -0.05 -0.02 --57.0 -3.27022330877193E7 1.0 -3.27022330877193E7 -0.05263157894736842 -0.021052631578947368 --49.0 3.35888328367347E7 1.0 3.35888328367347E7 -0.061224489795918366 -0.024489795918367346 --46.0 3.577940889130435E7 1.0 3.577940889130435E7 -0.06521739130434782 -0.02608695652173913 --38.0 4.3311916026315786E7 1.0 4.3311916026315786E7 -0.07894736842105263 -0.031578947368421054 +-113.0 NULL 1.0 NULL -0.02654867256637168 -0.010619469026548672 +-116.0 NULL 1.0 NULL -0.02586206896551724 -0.010344827586206896 +-12.0 -1.5533560716666666E8 1.0 -1.5533560716666666E8 -0.25 -0.09999999999999999 +-126.0 -1.4793867349206349E7 1.0 -1.4793867349206349E7 -0.023809523809523808 -0.009523809523809523 +-126.0 NULL 1.0 NULL -0.023809523809523808 -0.009523809523809523 +-128.0 NULL 1.0 NULL -0.0234375 -0.009375 +-129.0 1.2758548906976745E7 1.0 1.2758548906976745E7 -0.023255813953488372 -0.009302325581395349 +-132.0 NULL 1.0 NULL -0.022727272727272728 -0.00909090909090909 +-137.0 NULL 1.0 NULL -0.021897810218978103 -0.00875912408759124 +-138.0 NULL 1.0 NULL -0.021739130434782608 -0.008695652173913044 +-140.0 NULL 1.0 NULL -0.02142857142857143 -0.008571428571428572 +-148.0 NULL 1.0 NULL -0.02027027027027027 -0.008108108108108109 +-152.0 NULL 1.0 NULL -0.019736842105263157 -0.007894736842105263 +-154.0 1.2256894519480519E7 1.0 1.2256894519480519E7 -0.01948051948051948 -0.007792207792207792 +-161.0 NULL 1.0 NULL -0.018633540372670808 -0.007453416149068323 +-164.0 NULL 1.0 NULL -0.018292682926829267 -0.007317073170731707 +-169.0 9738774.01775148 1.0 9738774.01775148 -0.01775147928994083 -0.007100591715976331 +-17.0 NULL 1.0 NULL -0.17647058823529413 -0.07058823529411765 +-179.0 NULL 1.0 NULL -0.01675977653631285 -0.0067039106145251395 +-181.0 NULL 1.0 NULL -0.016574585635359115 -0.0066298342541436465 +-183.0 8993731.196721312 1.0 8993731.196721312 -0.01639344262295082 -0.006557377049180328 +-184.0 8944852.222826088 1.0 8944852.222826088 -0.016304347826086956 -0.006521739130434782 +-188.0 NULL 1.0 NULL -0.015957446808510637 -0.006382978723404255 +-189.0 NULL 1.0 NULL -0.015873015873015872 -0.006349206349206349 +-199.0 NULL 1.0 NULL -0.01507537688442211 -0.006030150753768844 +-20.0 NULL 1.0 NULL -0.15 -0.06 +-201.0 NULL 1.0 NULL -0.014925373134328358 -0.005970149253731343 +-21.0 8.988389314285715E7 1.0 8.988389314285715E7 -0.14285714285714285 -0.05714285714285714 +-210.0 -8876320.40952381 1.0 -8876320.40952381 -0.014285714285714285 -0.005714285714285714 +-225.0 NULL 1.0 NULL -0.013333333333333334 -0.005333333333333333 +-228.0 8278779.631578947 1.0 8278779.631578947 -0.013157894736842105 -0.005263157894736842 +-229.0 7187130.170305677 1.0 7187130.170305677 -0.013100436681222707 -0.005240174672489083 +-236.0 NULL 1.0 NULL -0.012711864406779662 -0.005084745762711864 +-237.0 NULL 1.0 NULL -0.012658227848101266 -0.005063291139240506 +-246.0 NULL 1.0 NULL -0.012195121951219513 -0.004878048780487805 +-247.0 -7546669.174089069 1.0 -7546669.174089069 -0.012145748987854251 -0.004858299595141701 +-247.0 NULL 1.0 NULL -0.012145748987854251 -0.004858299595141701 +-250.0 6583411.236 1.0 6583411.236 -0.012 -0.0048 +-257.0 6404096.53307393 1.0 6404096.53307393 -0.011673151750972763 -0.004669260700389105 +-273.0 6028764.868131869 1.0 6028764.868131869 -0.01098901098901099 -0.004395604395604396 +-274.0 6888911.518248175 1.0 6888911.518248175 -0.010948905109489052 -0.00437956204379562 +-279.0 NULL 1.0 NULL -0.010752688172043012 -0.004301075268817204 -28.0 5.878045746428572E7 1.0 5.878045746428572E7 -0.10714285714285714 -0.04285714285714286 -28.0 6.741291985714285E7 1.0 6.741291985714285E7 -0.10714285714285714 -0.04285714285714286 --21.0 8.988389314285715E7 1.0 8.988389314285715E7 -0.14285714285714285 -0.05714285714285714 --20.0 NULL 1.0 NULL -0.15 -0.06 --17.0 NULL 1.0 NULL -0.17647058823529413 -0.07058823529411765 --12.0 -1.5533560716666666E8 1.0 -1.5533560716666666E8 -0.25 -0.09999999999999999 +-281.0 NULL 1.0 NULL -0.010676156583629894 -0.004270462633451957 +-289.0 NULL 1.0 NULL -0.010380622837370242 -0.004152249134948096 +-290.0 NULL 1.0 NULL -0.010344827586206896 -0.004137931034482759 +-292.0 NULL 1.0 NULL -0.010273972602739725 -0.00410958904109589 -3.0 NULL 1.0 NULL -1.0 -0.39999999999999997 +-38.0 4.3311916026315786E7 1.0 4.3311916026315786E7 -0.07894736842105263 -0.031578947368421054 +-46.0 3.577940889130435E7 1.0 3.577940889130435E7 -0.06521739130434782 -0.02608695652173913 +-49.0 3.35888328367347E7 1.0 3.35888328367347E7 -0.061224489795918366 -0.024489795918367346 +-57.0 -3.27022330877193E7 1.0 -3.27022330877193E7 -0.05263157894736842 -0.021052631578947368 +-60.0 NULL 1.0 NULL -0.05 -0.02 +-62.0 3.0444544451612905E7 1.0 3.0444544451612905E7 -0.04838709677419355 -0.01935483870967742 +-62.0 NULL 1.0 NULL -0.04838709677419355 -0.01935483870967742 +-69.0 2.735596747826087E7 1.0 2.735596747826087E7 -0.043478260869565216 -0.017391304347826087 +-77.0 2.4513789038961038E7 1.0 2.4513789038961038E7 -0.03896103896103896 -0.015584415584415584 +-93.0 NULL 1.0 NULL -0.03225806451612903 -0.012903225806451613 +-94.0 -1.9830077510638297E7 1.0 -1.9830077510638297E7 -0.031914893617021274 -0.01276595744680851 +-96.0 NULL 1.0 NULL -0.03125 -0.012499999999999999 0.0 NULL NULL NULL NULL NULL 0.0 NULL NULL NULL NULL NULL 0.0 NULL NULL NULL NULL NULL @@ -725,103 +725,103 @@ from alltypesorc where cint > 500000000 or cdouble > 1000000000 or ctinyint = 0 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### -528534767 NULL -50 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 33 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 -528534767 NULL NULL NULL NULL NULL NULL NULL NULL +528534767 NULL -1 NULL NULL 1.0 NULL NULL 0 528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 16 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 62 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -51 NULL NULL 1.0 NULL NULL 0 528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -62 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 39 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -32 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -7 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 24 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 528534767 NULL -11 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -24 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -44 NULL NULL 1.0 NULL NULL 0 -NULL 1018195815 0 NULL NULL NULL NULL NULL NULL -528534767 NULL 24 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 4 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -57 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 28 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -12 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -13 NULL NULL 1.0 NULL NULL 0 528534767 NULL -16 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 46 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 528534767 NULL -16 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -54 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 528534767 NULL -19 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 5 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 19 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -23 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -24 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -30 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -32 NULL NULL 1.0 NULL NULL 0 528534767 NULL -33 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 18 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 30 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -33 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -34 NULL NULL 1.0 NULL NULL 0 528534767 NULL -36 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -37 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 528534767 NULL -40 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 21 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -59 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 0 NULL NULL NULL NULL NULL NULL -528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -33 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -30 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -43 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -44 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -47 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -53 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 27 NULL NULL 1.0 NULL NULL 0 -528534767 NULL NULL NULL NULL NULL NULL NULL NULL -528534767 NULL -21 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 43 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 41 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -28 NULL NULL 1.0 NULL NULL 0 528534767 NULL -5 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 13 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -45 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -50 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -51 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -53 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -54 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -55 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -56 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -57 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -59 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -62 NULL NULL 1.0 NULL NULL 0 +528534767 NULL -7 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 0 NULL NULL NULL NULL NULL NULL +528534767 NULL 0 NULL NULL NULL NULL NULL NULL 528534767 NULL 10 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -48 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 13 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 16 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 18 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 19 NULL NULL 1.0 NULL NULL 0 528534767 NULL 2 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -37 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -43 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -1 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -12 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 0 NULL NULL NULL NULL NULL NULL +528534767 NULL 21 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 24 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 24 NULL NULL 1.0 NULL NULL 0 528534767 NULL 26 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -22 NULL NULL 1.0 NULL NULL 0 -528534767 NULL 9 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -13 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 27 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 28 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 29 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 30 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 31 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 33 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 34 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 36 NULL NULL 1.0 NULL NULL 0 528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -4 NULL NULL 1.0 NULL NULL 0 -528534767 NULL -47 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 38 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 39 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 4 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 40 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 41 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 43 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 46 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 5 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 51 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 53 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 61 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 62 NULL NULL 1.0 NULL NULL 0 +528534767 NULL 9 NULL NULL 1.0 NULL NULL 0 +528534767 NULL NULL NULL NULL NULL NULL NULL NULL +528534767 NULL NULL NULL NULL NULL NULL NULL NULL +NULL 1018195815 0 NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/vector_between_columns.q.out ql/src/test/results/clientpositive/vector_between_columns.q.out index c65ef71..7967df1 100644 --- ql/src/test/results/clientpositive/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -47,6 +47,17 @@ POSTHOOK: Output: default@TSINT POSTHOOK: Lineage: tsint.csint SIMPLE [(tsint_txt)tsint_txt.FieldSchema(name:csint, type:smallint, comment:null), ] POSTHOOK: Lineage: tsint.rnum SIMPLE [(tsint_txt)tsint_txt.FieldSchema(name:rnum, type:int, comment:null), ] tsint_txt.rnum tsint_txt.csint +PREHOOK: query: insert into TSINT values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tsint +POSTHOOK: query: insert into TSINT values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tsint +POSTHOOK: Lineage: tsint.csint EXPRESSION [] +POSTHOOK: Lineage: tsint.rnum EXPRESSION [] +_col0 _col1 PREHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@tint_txt @@ -60,6 +71,17 @@ POSTHOOK: Output: default@TINT POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ] tint_txt.rnum tint_txt.cint +PREHOOK: query: insert into TINT values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tint +POSTHOOK: query: insert into TINT values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tint +POSTHOOK: Lineage: tint.cint EXPRESSION [] +POSTHOOK: Lineage: tint.rnum EXPRESSION [] +_col0 _col1 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint @@ -88,11 +110,11 @@ STAGE PLANS: $hdt$_0:tint TableScan alias: tint - Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: rnum (type: int), cint (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 @@ -103,7 +125,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tsint - Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -113,7 +135,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -127,7 +149,7 @@ STAGE PLANS: nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3)) THEN ('Ok') ELSE ('NoOk') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -136,13 +158,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 2, 1, 3, 5] selectExpressions: IfExprStringScalarStringScalar(col 4:boolean, val Ok, val NoOk)(children: VectorUDFAdaptor(_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3))(children: col 3:smallint, col 3:smallint) -> 4:boolean) -> 5:string - Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -183,26 +205,37 @@ tint.rnum tsint.rnum tint.cint tsint.csint between_col 0 2 NULL 0 NoOk 0 3 NULL 1 NoOk 0 4 NULL 10 NoOk +0 NULL NULL NULL NoOk 1 0 -1 NULL NoOk 1 1 -1 -1 Ok 1 2 -1 0 NoOk 1 3 -1 1 NoOk 1 4 -1 10 NoOk +1 NULL -1 NULL NoOk 2 0 0 NULL NoOk 2 1 0 -1 NoOk 2 2 0 0 Ok 2 3 0 1 NoOk 2 4 0 10 NoOk +2 NULL 0 NULL NoOk 3 0 1 NULL NoOk 3 1 1 -1 NoOk 3 2 1 0 NoOk 3 3 1 1 Ok 3 4 1 10 NoOk +3 NULL 1 NULL NoOk 4 0 10 NULL NoOk 4 1 10 -1 NoOk 4 2 10 0 NoOk 4 3 10 1 NoOk 4 4 10 10 Ok +4 NULL 10 NULL NoOk +NULL 0 NULL NULL NoOk +NULL 1 NULL -1 NoOk +NULL 2 NULL 0 NoOk +NULL 3 NULL 1 NoOk +NULL 4 NULL 10 NoOk +NULL NULL NULL NULL NoOk Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint @@ -231,11 +264,11 @@ STAGE PLANS: $hdt$_0:tint TableScan alias: tint - Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: rnum (type: int), cint (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 @@ -246,7 +279,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tsint - Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -256,7 +289,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -270,14 +303,14 @@ STAGE PLANS: nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 385 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: SelectColumnIsTrue(col 4:boolean)(children: VectorUDFAdaptor(_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3))(children: col 3:smallint, col 3:smallint) -> 4:boolean) predicate: _col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3) (type: boolean) - Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3 @@ -285,13 +318,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 1, 3] - Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vector_bround.q.out ql/src/test/results/clientpositive/vector_bround.q.out index a2b745a..57fb16f 100644 --- ql/src/test/results/clientpositive/vector_bround.q.out +++ ql/src/test/results/clientpositive/vector_bround.q.out @@ -34,6 +34,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_vector_bround POSTHOOK: Lineage: test_vector_bround.v0 SCRIPT [] POSTHOOK: Lineage: test_vector_bround.v1 SCRIPT [] +PREHOOK: query: insert into test_vector_bround values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_vector_bround +POSTHOOK: query: insert into test_vector_bround values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_vector_bround +POSTHOOK: Lineage: test_vector_bround.v0 EXPRESSION [] +POSTHOOK: Lineage: test_vector_bround.v1 EXPRESSION [] PREHOOK: query: explain vectorization detail select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY @@ -54,7 +64,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_vector_bround - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 128 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:v0:double, 1:v1:double, 2:ROW__ID:struct] @@ -66,13 +76,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4] selectExpressions: FuncBRoundDoubleToDouble(col 0:double) -> 3:double, BRoundWithNumDigitsDoubleToDouble(col 1, decimalPlaces 1) -> 4:double - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 128 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 128 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -116,3 +126,4 @@ POSTHOOK: Input: default@test_vector_bround 3.0 1.3 3.0 1.3 4.0 1.4 +NULL NULL diff --git ql/src/test/results/clientpositive/vector_char_2.q.out ql/src/test/results/clientpositive/vector_char_2.q.out index ea0ddff..b38cbe7 100644 --- ql/src/test/results/clientpositive/vector_char_2.q.out +++ ql/src/test/results/clientpositive/vector_char_2.q.out @@ -26,6 +26,16 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@char_2 POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into char_2 values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert into char_2 values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [] +POSTHOOK: Lineage: char_2.value EXPRESSION [] PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows from src group by value @@ -74,7 +84,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 501 Data size: 99168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -85,7 +95,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3] selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 501 Data size: 99168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: @@ -99,7 +109,7 @@ STAGE PLANS: keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 501 Data size: 99168 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: + @@ -109,7 +119,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 501 Data size: 99168 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized @@ -132,7 +142,7 @@ STAGE PLANS: keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 49485 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -154,7 +164,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 49485 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized @@ -175,13 +185,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 49485 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 985 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 985 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -209,11 +219,11 @@ limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_2 #### A masked pattern was here #### +NULL NULL 1 val_0 0 3 val_10 10 1 val_100 200 2 val_103 206 2 -val_104 208 2 PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows from src group by value @@ -262,7 +272,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 501 Data size: 99168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -273,7 +283,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3] selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 501 Data size: 99168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: @@ -287,7 +297,7 @@ STAGE PLANS: keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 501 Data size: 99168 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: - @@ -297,7 +307,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 501 Data size: 99168 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized @@ -320,7 +330,7 @@ STAGE PLANS: keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 49485 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -342,7 +352,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 49485 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized @@ -363,13 +373,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 49485 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 985 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 985 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vector_coalesce_2.q.out ql/src/test/results/clientpositive/vector_coalesce_2.q.out index a5a7915..ca06456 100644 --- ql/src/test/results/clientpositive/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_2.q.out @@ -16,6 +16,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@str_str_orc POSTHOOK: Lineage: str_str_orc.str1 SCRIPT [] POSTHOOK: Lineage: str_str_orc.str2 SCRIPT [] +PREHOOK: query: insert into str_str_orc values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@str_str_orc +POSTHOOK: query: insert into str_str_orc values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@str_str_orc +POSTHOOK: Lineage: str_str_orc.str1 EXPRESSION [] +POSTHOOK: Lineage: str_str_orc.str2 EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result @@ -29,8 +39,8 @@ from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -42,37 +52,71 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 5] + selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 1:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 271 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 271 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 271 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -98,6 +142,7 @@ GROUP BY str2 POSTHOOK: type: QUERY POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### +NULL 0.0 X 0.02 y 0.0 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION @@ -109,8 +154,8 @@ SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -122,18 +167,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: COALESCE(str1,0) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4] + selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -155,6 +220,7 @@ POSTHOOK: Input: default@str_str_orc 0 1 0 +0 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result @@ -181,7 +247,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -192,7 +258,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 5] selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: @@ -206,7 +272,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -216,7 +282,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -238,14 +304,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 271 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 271 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 271 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -271,6 +337,7 @@ GROUP BY str2 POSTHOOK: type: QUERY POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### +NULL 0.0 X 0.02 y 0.0 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION @@ -295,7 +362,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -306,13 +373,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 678 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -348,3 +415,4 @@ POSTHOOK: Input: default@str_str_orc 0 1 0 +0 diff --git ql/src/test/results/clientpositive/vector_coalesce_3.q.out ql/src/test/results/clientpositive/vector_coalesce_3.q.out index 0e602f0..111e0a5 100644 --- ql/src/test/results/clientpositive/vector_coalesce_3.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_3.q.out @@ -33,14 +33,37 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN +PREHOOK: query: insert into test_1 values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_1 +POSTHOOK: query: insert into test_1 values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_1 +POSTHOOK: Lineage: test_1.attr EXPRESSION [] +POSTHOOK: Lineage: test_1.member EXPRESSION [] +PREHOOK: query: insert into test_2 values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_2 +POSTHOOK: query: insert into test_2 values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_2 +POSTHOOK: Lineage: test_2.member EXPRESSION [] +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -57,11 +80,11 @@ STAGE PLANS: $hdt$_1:n TableScan alias: n - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: member (type: bigint), attr (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: bigint) @@ -72,31 +95,68 @@ STAGE PLANS: Map Operator Tree: TableScan alias: m - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:member:bigint, 1:ROW__ID:struct] Select Operator expressions: member (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 5 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 keys: 0 _col0 (type: bigint) 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + bigTableValueExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col2 - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3] + selectExpressions: IfExprColumnNull(col 2:boolean, col 1:bigint, null)(children: LongColGreaterLongScalar(col 3:bigint, val 1)(children: VectorCoalesce(columns [1, 2])(children: col 1:bigint, ConstantVectorExpression(val 5) -> 2:bigint) -> 3:bigint) -> 2:boolean, col 1:bigint) -> 3:bigint + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: member:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Local Work: Map Reduce Local Work @@ -122,3 +182,4 @@ POSTHOOK: Input: default@test_2 2 2 3 NULL 4 NULL +NULL NULL diff --git ql/src/test/results/clientpositive/vector_data_types.q.out ql/src/test/results/clientpositive/vector_data_types.q.out index 2b6491f..06a1fc4 100644 --- ql/src/test/results/clientpositive/vector_data_types.q.out +++ ql/src/test/results/clientpositive/vector_data_types.q.out @@ -95,6 +95,25 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: insert into over1korc values (NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@over1korc +POSTHOOK: query: insert into over1korc values (NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL, NULL,NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@over1korc +POSTHOOK: Lineage: over1korc.b EXPRESSION [] +POSTHOOK: Lineage: over1korc.bin EXPRESSION [] +POSTHOOK: Lineage: over1korc.bo EXPRESSION [] +POSTHOOK: Lineage: over1korc.d EXPRESSION [] +POSTHOOK: Lineage: over1korc.dec EXPRESSION [] +POSTHOOK: Lineage: over1korc.f EXPRESSION [] +POSTHOOK: Lineage: over1korc.i EXPRESSION [] +POSTHOOK: Lineage: over1korc.s EXPRESSION [] +POSTHOOK: Lineage: over1korc.si EXPRESSION [] +POSTHOOK: Lineage: over1korc.t EXPRESSION [] +POSTHOOK: Lineage: over1korc.ts EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i LIMIT 20 @@ -113,22 +132,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE @@ -154,6 +173,7 @@ POSTHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 374 65560 4294967516 65.43 22.48 true oscar quirinius 2013-03-01 09:11:58.703316 16.86 mathematics NULL 409 65536 4294967490 46.97 25.92 false fred miller 2013-03-01 09:11:58.703116 33.45 history NULL 473 65720 4294967324 80.74 40.6 false holly falkner 2013-03-01 09:11:58.703111 18.80 mathematics @@ -173,7 +193,6 @@ NULL 473 65720 4294967324 80.74 40.6 false holly falkner 2013-03-01 09:11:58.703 -2 461 65648 4294967425 58.52 24.85 false rachel thompson 2013-03-01 09:11:58.703318 85.62 zync studies -1 268 65778 4294967418 56.33 44.73 true calvin falkner 2013-03-01 09:11:58.70322 7.37 history -1 281 65643 4294967323 15.1 45.0 false irene nixon 2013-03-01 09:11:58.703223 80.96 undecided --1 300 65663 4294967343 71.26 34.62 true calvin ovid 2013-03-01 09:11:58.703262 78.56 study skills PREHOOK: query: SELECT SUM(HASH(*)) FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q PREHOOK: type: QUERY @@ -203,7 +222,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -213,7 +232,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ @@ -222,7 +241,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized @@ -243,7 +262,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE @@ -269,6 +288,7 @@ POSTHOOK: query: SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 374 65560 4294967516 65.43 22.48 true oscar quirinius 2013-03-01 09:11:58.703316 16.86 mathematics NULL 409 65536 4294967490 46.97 25.92 false fred miller 2013-03-01 09:11:58.703116 33.45 history NULL 473 65720 4294967324 80.74 40.6 false holly falkner 2013-03-01 09:11:58.703111 18.80 mathematics @@ -288,7 +308,95 @@ NULL 473 65720 4294967324 80.74 40.6 false holly falkner 2013-03-01 09:11:58.703 -2 461 65648 4294967425 58.52 24.85 false rachel thompson 2013-03-01 09:11:58.703318 85.62 zync studies -1 268 65778 4294967418 56.33 44.73 true calvin falkner 2013-03-01 09:11:58.70322 7.37 history -1 281 65643 4294967323 15.1 45.0 false irene nixon 2013-03-01 09:11:58.703223 80.96 undecided --1 300 65663 4294967343 71.26 34.62 true calvin ovid 2013-03-01 09:11:58.703262 78.56 study skills +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT SUM(HASH(*)) +FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT SUM(HASH(*)) +FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: hash(t,si,i,b,f,d,bo,s,ts,dec,bin) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [12] + selectExpressions: VectorUDFAdaptor(hash(t,si,i,b,f,d,bo,s,ts,dec,bin)) -> 12:int + Statistics: Num rows: 1050 Data size: 311254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 12:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(HASH(*)) FROM (SELECT t, si, i, b, f, d, bo, s, ts, `dec`, bin FROM over1korc ORDER BY t, si, i) as q PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_date_1.q.out ql/src/test/results/clientpositive/vector_date_1.q.out index 3f2b212..93f9a71 100644 --- ql/src/test/results/clientpositive/vector_date_1.q.out +++ ql/src/test/results/clientpositive/vector_date_1.q.out @@ -22,6 +22,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 EXPRESSION [] POSTHOOK: Lineage: vector_date_1.dt2 EXPRESSION [] +_col0 _col1 PREHOOK: query: insert into table vector_date_1 select date '1999-12-31', date '2000-01-01' from src limit 1 PREHOOK: type: QUERY @@ -34,6 +35,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] +_c0 _c1 PREHOOK: query: insert into table vector_date_1 select date '2001-01-01', date '2001-06-01' from src limit 1 PREHOOK: type: QUERY @@ -46,7 +48,20 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] -PREHOOK: query: explain +_c0 _c1 +PREHOOK: query: select * from vector_date_1 order by dt1, dt2 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from vector_date_1 order by dt1, dt2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +vector_date_1.dt1 vector_date_1.dt2 +NULL NULL +1999-12-31 2000-01-01 +2001-01-01 2001-06-01 +PREHOOK: query: explain vectorization detail select dt1, dt2, -- should be all true @@ -60,7 +75,7 @@ select dt2 > dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2, -- should be all true @@ -74,6 +89,11 @@ select dt2 > dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -85,16 +105,48 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), dt2 (type: date), (dt1 = dt1) (type: boolean), (dt1 <> dt2) (type: boolean), (dt1 <= dt1) (type: boolean), (dt1 <= dt2) (type: boolean), (dt1 < dt2) (type: boolean), (dt2 >= dt2) (type: boolean), (dt2 >= dt1) (type: boolean), (dt2 > dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: LongColEqualLongColumn(col 0:date, col 0:date) -> 3:boolean, LongColNotEqualLongColumn(col 0:date, col 1:date) -> 4:boolean, LongColLessEqualLongColumn(col 0:date, col 0:date) -> 5:boolean, LongColLessEqualLongColumn(col 0:date, col 1:date) -> 6:boolean, LongColLessLongColumn(col 0:date, col 1:date) -> 7:boolean, LongColGreaterEqualLongColumn(col 1:date, col 1:date) -> 8:boolean, LongColGreaterEqualLongColumn(col 1:date, col 0:date) -> 9:boolean, LongColGreaterLongColumn(col 1:date, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) @@ -144,10 +196,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 2000-01-01 true true true true true true true true 2001-01-01 2001-06-01 true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2, -- should be all false @@ -161,7 +214,7 @@ select dt2 < dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2, -- should be all false @@ -175,6 +228,11 @@ select dt2 < dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -186,16 +244,48 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), dt2 (type: date), (dt1 <> dt1) (type: boolean), (dt1 = dt2) (type: boolean), (dt1 < dt1) (type: boolean), (dt1 >= dt2) (type: boolean), (dt1 > dt2) (type: boolean), (dt2 > dt2) (type: boolean), (dt2 <= dt1) (type: boolean), (dt2 < dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: LongColNotEqualLongColumn(col 0:date, col 0:date) -> 3:boolean, LongColEqualLongColumn(col 0:date, col 1:date) -> 4:boolean, LongColLessLongColumn(col 0:date, col 0:date) -> 5:boolean, LongColGreaterEqualLongColumn(col 0:date, col 1:date) -> 6:boolean, LongColGreaterLongColumn(col 0:date, col 1:date) -> 7:boolean, LongColGreaterLongColumn(col 1:date, col 1:date) -> 8:boolean, LongColLessEqualLongColumn(col 1:date, col 0:date) -> 9:boolean, LongColLessLongColumn(col 1:date, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) @@ -245,10 +335,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 2000-01-01 false false false false false false false false 2001-01-01 2001-06-01 false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, -- should be all true @@ -262,7 +353,7 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, -- should be all true @@ -276,6 +367,11 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -287,16 +383,48 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), (dt1 <> 1970-01-01) (type: boolean), (dt1 >= 1970-01-01) (type: boolean), (dt1 > 1970-01-01) (type: boolean), (dt1 <= 2100-01-01) (type: boolean), (dt1 < 2100-01-01) (type: boolean), (1970-01-01 <> dt1) (type: boolean), (1970-01-01 <= dt1) (type: boolean), (1970-01-01 < dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: DateColNotEqualDateScalar(col 0:date, date 1970-01-01) -> 3:boolean, DateColGreaterEqualDateScalar(col 0:date, date 1970-01-01) -> 4:boolean, DateColGreaterDateScalar(col 0:date, date 1970-01-01) -> 5:boolean, DateColLessEqualDateScalar(col 0:date, date 2100-01-01) -> 6:boolean, DateColLessDateScalar(col 0:date, date 2100-01-01) -> 7:boolean, DateScalarNotEqualDateColumn(date 1970-01-01, col 0:date) -> 8:boolean, DateScalarLessEqualDateColumn(date 1970-01-01, col 0:date) -> 9:boolean, DateScalarLessDateColumn(date 1970-01-01, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) @@ -346,10 +474,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 true true true true true true true true 2001-01-01 true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, -- should all be false @@ -363,7 +492,7 @@ select date '1970-01-01' > dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, -- should all be false @@ -377,6 +506,11 @@ select date '1970-01-01' > dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -388,16 +522,48 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), (dt1 = 1970-01-01) (type: boolean), (dt1 <= 1970-01-01) (type: boolean), (dt1 < 1970-01-01) (type: boolean), (dt1 >= 2100-01-01) (type: boolean), (dt1 > 2100-01-01) (type: boolean), (1970-01-01 = dt1) (type: boolean), (1970-01-01 >= dt1) (type: boolean), (1970-01-01 > dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: DateColEqualDateScalar(col 0:date, date 1970-01-01) -> 3:boolean, DateColLessEqualDateScalar(col 0:date, date 1970-01-01) -> 4:boolean, DateColLessDateScalar(col 0:date, date 1970-01-01) -> 5:boolean, DateColGreaterEqualDateScalar(col 0:date, date 2100-01-01) -> 6:boolean, DateColGreaterDateScalar(col 0:date, date 2100-01-01) -> 7:boolean, DateScalarEqualDateColumn(date 1970-01-01, col 0:date) -> 8:boolean, DateScalarGreaterEqualDateColumn(date 1970-01-01, col 0:date) -> 9:boolean, DateScalarGreaterDateColumn(date 1970-01-01, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) @@ -447,10 +613,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 false false false false false false false false 2001-01-01 false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -463,7 +630,7 @@ where and dt2 >= dt1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -476,6 +643,11 @@ where and dt2 >= dt1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -487,19 +659,54 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 0:date, col 0:date), FilterLongColNotEqualLongColumn(col 0:date, col 1:date), FilterLongColLessLongColumn(col 0:date, col 1:date), FilterLongColLessEqualLongColumn(col 0:date, col 1:date), FilterLongColGreaterLongColumn(col 1:date, col 0:date), FilterLongColGreaterEqualLongColumn(col 1:date, col 0:date)) predicate: ((dt1 < dt2) and (dt1 <= dt2) and (dt1 <> dt2) and (dt1 = dt1) and (dt2 > dt1) and (dt2 >= dt1)) (type: boolean) Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dt1 (type: date), dt2 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date) @@ -547,9 +754,10 @@ order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 1999-12-31 2000-01-01 2001-01-01 2001-06-01 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -564,7 +772,7 @@ where and date '1970-01-01' <= dt1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -579,6 +787,11 @@ where and date '1970-01-01' <= dt1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -590,21 +803,51 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDateScalarEqualDateColumn(val 11323, col 0:date), FilterDateColNotEqualDateScalar(col 0:date, val 0), FilterDateScalarNotEqualDateColumn(val 0, col 0:date)) predicate: ((1970-01-01 <> dt1) and (2001-01-01 = dt1) and (dt1 <> 1970-01-01)) (type: boolean) Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 2001-01-01 (type: date), dt2 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 1] + selectExpressions: ConstantVectorExpression(val 11323) -> 3:date Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 74 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Stage: Stage-0 Fetch Operator @@ -644,13 +887,15 @@ order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 2001-01-01 2001-06-01 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01') PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01') POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -668,6 +913,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -703,6 +949,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -718,6 +970,7 @@ POSTHOOK: query: SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 2001-01-01 PREHOOK: query: drop table vector_date_1 PREHOOK: type: DROPTABLE diff --git ql/src/test/results/clientpositive/vector_decimal_1.q.out ql/src/test/results/clientpositive/vector_decimal_1.q.out index d922bfb..ffd32f6 100644 --- ql/src/test/results/clientpositive/vector_decimal_1.q.out +++ ql/src/test/results/clientpositive/vector_decimal_1.q.out @@ -32,6 +32,17 @@ POSTHOOK: Output: default@decimal_1 POSTHOOK: Lineage: decimal_1.t EXPRESSION [] POSTHOOK: Lineage: decimal_1.u EXPRESSION [] POSTHOOK: Lineage: decimal_1.v EXPRESSION [] +PREHOOK: query: insert into decimal_1 values (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: insert into decimal_1 values (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_1 +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +POSTHOOK: Lineage: decimal_1.u EXPRESSION [] +POSTHOOK: Lineage: decimal_1.v EXPRESSION [] PREHOOK: query: explain vectorization detail select cast(t as boolean) from decimal_1 order by t PREHOOK: type: QUERY @@ -52,7 +63,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -64,7 +75,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToBoolean(col 0:decimal(4,2)) -> 4:boolean - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -73,7 +84,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -98,10 +109,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -121,6 +132,7 @@ POSTHOOK: query: select cast(t as boolean) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL true PREHOOK: query: explain vectorization detail select cast(t as tinyint) from decimal_1 order by t @@ -142,7 +154,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -154,7 +166,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToLong(col 0:decimal(4,2)) -> 4:tinyint - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -163,7 +175,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -188,10 +200,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -211,6 +223,7 @@ POSTHOOK: query: select cast(t as tinyint) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17 PREHOOK: query: explain vectorization detail select cast(t as smallint) from decimal_1 order by t @@ -232,7 +245,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -244,7 +257,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToLong(col 0:decimal(4,2)) -> 4:smallint - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + @@ -253,7 +266,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -278,10 +291,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -301,6 +314,7 @@ POSTHOOK: query: select cast(t as smallint) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17 PREHOOK: query: explain vectorization detail select cast(t as int) from decimal_1 order by t @@ -322,7 +336,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -334,7 +348,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToLong(col 0:decimal(4,2)) -> 4:int - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -343,7 +357,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -368,10 +382,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -391,6 +405,7 @@ POSTHOOK: query: select cast(t as int) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17 PREHOOK: query: explain vectorization detail select cast(t as bigint) from decimal_1 order by t @@ -412,7 +427,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -424,7 +439,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToLong(col 0:decimal(4,2)) -> 4:bigint - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -433,7 +448,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -458,10 +473,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -481,6 +496,7 @@ POSTHOOK: query: select cast(t as bigint) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17 PREHOOK: query: explain vectorization detail select cast(t as float) from decimal_1 order by t @@ -502,7 +518,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -514,7 +530,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToDouble(col 0:decimal(4,2)) -> 4:float - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float) sort order: + @@ -523,7 +539,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -548,10 +564,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -571,6 +587,7 @@ POSTHOOK: query: select cast(t as float) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17.29 PREHOOK: query: explain vectorization detail select cast(t as double) from decimal_1 order by t @@ -592,7 +609,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -604,7 +621,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToDouble(col 0:decimal(4,2)) -> 4:double - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -613,7 +630,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -638,10 +655,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -661,6 +678,7 @@ POSTHOOK: query: select cast(t as double) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17.29 PREHOOK: query: explain vectorization detail select cast(t as string) from decimal_1 order by t @@ -682,7 +700,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -694,7 +712,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToString(col 0:decimal(4,2)) -> 4:string - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -703,7 +721,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -728,10 +746,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -751,6 +769,7 @@ POSTHOOK: query: select cast(t as string) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 17.29 PREHOOK: query: explain vectorization detail select cast(t as timestamp) from decimal_1 order by t @@ -772,7 +791,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -784,7 +803,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToTimestamp(col 0:decimal(4,2)) -> 4:timestamp - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + @@ -793,7 +812,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -818,10 +837,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -841,6 +860,7 @@ POSTHOOK: query: select cast(t as timestamp) from decimal_1 order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 #### A masked pattern was here #### +NULL 1969-12-31 16:00:17.29 PREHOOK: query: drop table decimal_1 PREHOOK: type: DROPTABLE diff --git ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out index b2fb974..2f7ce31 100644 --- ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out @@ -20,6 +20,18 @@ POSTHOOK: Lineage: decimal_vgby.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.F POSTHOOK: Lineage: decimal_vgby.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into decimal_vgby values (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_vgby +POSTHOOK: query: insert into decimal_vgby values (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_vgby +POSTHOOK: Lineage: decimal_vgby.cdecimal1 EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby.cdecimal2 EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby.cdouble EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby.cint EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), @@ -50,7 +62,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:cint:int, 4:ROW__ID:struct] @@ -61,7 +73,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3] - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() Group By Vectorization: @@ -75,7 +87,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -85,7 +97,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -113,17 +125,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1082441 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col9 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -191,7 +203,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:cint:int, 4:ROW__ID:struct] @@ -202,7 +214,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3] - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() Group By Vectorization: @@ -216,7 +228,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -226,7 +238,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -254,17 +266,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1082441 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col15 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -324,6 +336,18 @@ POSTHOOK: Lineage: decimal_vgby_small.cdecimal1 EXPRESSION [(alltypesorc)alltype POSTHOOK: Lineage: decimal_vgby_small.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby_small.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_vgby_small.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into decimal_vgby_small values (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_vgby_small +POSTHOOK: query: insert into decimal_vgby_small values (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_vgby_small +POSTHOOK: Lineage: decimal_vgby_small.cdecimal1 EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby_small.cdecimal2 EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby_small.cdouble EXPRESSION [] +POSTHOOK: Lineage: decimal_vgby_small.cint EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), @@ -354,7 +378,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby_small - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(11,5)/DECIMAL_64, 2:cdecimal2:decimal(16,0)/DECIMAL_64, 3:cint:int, 4:ROW__ID:struct] @@ -365,7 +389,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3] - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() Group By Vectorization: @@ -379,7 +403,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -389,7 +413,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: bigint), _col6 (type: decimal(16,0)), _col7 (type: decimal(16,0)), _col8 (type: decimal(26,0)), _col9 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -417,17 +441,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 6144 Data size: 173230 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 173221 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col9 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 57743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: bigint), _col6 (type: decimal(16,0)), _col7 (type: decimal(16,0)), _col8 (type: decimal(26,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 2048 Data size: 57743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2048 Data size: 57743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,6 +489,25 @@ POSTHOOK: Input: default@decimal_vgby_small 6981 2 -515.62107 -515.62107 -1031.24214 3 6984454 -618 6983218 762 1 1531.21941 1531.21941 1531.21941 2 6984454 1834 6986288 NULL 3072 9318.43514 -4298.15135 5018444.11392 3072 11161 -5148 6010880 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_vgby_small +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_vgby_small +#### A masked pattern was here #### +-18663521580 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), @@ -495,7 +538,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby_small - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(11,5)/DECIMAL_64, 2:cdecimal2:decimal(16,0)/DECIMAL_64, 3:cint:int, 4:ROW__ID:struct] @@ -506,7 +549,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3] - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() Group By Vectorization: @@ -520,7 +563,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -530,7 +573,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 346461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -558,17 +601,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 6144 Data size: 173230 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 173221 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col15 > 1) (type: boolean) - Statistics: Num rows: 2048 Data size: 57743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: decimal(15,9)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: decimal(20,4)), _col13 (type: double), _col14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2048 Data size: 57743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2048 Data size: 57743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -606,3 +649,22 @@ POSTHOOK: Input: default@decimal_vgby_small 6981 2 -515.62107 -515.62107 -1031.24214 -515.621070000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175 762 1 1531.21941 1531.21941 1531.21941 1531.219410000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881 NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.608110000 5695.483083909642 5696.410309489072 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_vgby_small +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cint, + COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), + COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2) + FROM decimal_vgby_small + GROUP BY cint) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_vgby_small +#### A masked pattern was here #### +91757235680 diff --git ql/src/test/results/clientpositive/vector_decimal_expressions.q.out ql/src/test/results/clientpositive/vector_decimal_expressions.q.out index 434fe22..6bbe813 100644 --- ql/src/test/results/clientpositive/vector_decimal_expressions.q.out +++ ql/src/test/results/clientpositive/vector_decimal_expressions.q.out @@ -1,13 +1,30 @@ -PREHOOK: query: CREATE TABLE decimal_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@alltypesorc +PREHOOK: query: CREATE TABLE decimal_test (cdouble double,cdecimal1 DECIMAL(20,10), cdecimal2 DECIMAL(23,14)) STORED AS ORC +PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@decimal_test -POSTHOOK: query: CREATE TABLE decimal_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@alltypesorc +POSTHOOK: query: CREATE TABLE decimal_test (cdouble double,cdecimal1 DECIMAL(20,10), cdecimal2 DECIMAL(23,14)) STORED AS ORC +POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@decimal_test +PREHOOK: query: insert into decimal_test values (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_test +POSTHOOK: query: insert into decimal_test values (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_test +POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [] +POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [] +POSTHOOK: Lineage: decimal_test.cdouble EXPRESSION [] +PREHOOK: query: INSERT INTO TABLE decimal_test SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@decimal_test +POSTHOOK: query: INSERT INTO TABLE decimal_test SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@decimal_test POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] @@ -35,7 +52,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_test - Statistics: Num rows: 12288 Data size: 2128368 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2128368 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:ROW__ID:struct] @@ -45,7 +62,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 1:decimal(20,10), val 0), FilterDecimalColLessDecimalScalar(col 1:decimal(20,10), val 12345.5678), FilterDecimalColNotEqualDecimalScalar(col 2:decimal(23,14), val 0), FilterDecimalColGreaterDecimalScalar(col 2:decimal(23,14), val 1000), SelectColumnIsNotNull(col 0:double)) predicate: ((cdecimal1 < 12345.5678) and (cdecimal1 > 0) and (cdecimal2 <> 0) and (cdecimal2 > 1000) and cdouble is not null) (type: boolean) - Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 455 Data size: 78802 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdecimal1 + cdecimal2) (type: decimal(25,14)), (cdecimal1 - (2 * cdecimal2)) (type: decimal(26,14)), ((cdecimal1 + 2.34) / cdecimal2) (type: decimal(38,13)), (cdecimal1 * (cdecimal2 / 3.4)) (type: decimal(38,17)), (cdecimal1 % 10) (type: decimal(12,10)), UDFToInteger(cdecimal1) (type: int), UDFToShort(cdecimal2) (type: smallint), UDFToByte(cdecimal2) (type: tinyint), UDFToLong(cdecimal1) (type: bigint), UDFToBoolean(cdecimal1) (type: boolean), UDFToDouble(cdecimal2) (type: double), UDFToFloat(cdecimal1) (type: float), UDFToString(cdecimal2) (type: string), CAST( cdecimal1 AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -54,7 +71,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] selectExpressions: DecimalColAddDecimalColumn(col 1:decimal(20,10), col 2:decimal(23,14)) -> 4:decimal(25,14), DecimalColSubtractDecimalColumn(col 1:decimal(20,10), col 5:decimal(25,14))(children: DecimalScalarMultiplyDecimalColumn(val 2, col 2:decimal(23,14)) -> 5:decimal(25,14)) -> 6:decimal(26,14), DecimalColDivideDecimalColumn(col 7:decimal(21,10), col 2:decimal(23,14))(children: DecimalColAddDecimalScalar(col 1:decimal(20,10), val 2.34) -> 7:decimal(21,10)) -> 8:decimal(38,13), DecimalColMultiplyDecimalColumn(col 1:decimal(20,10), col 9:decimal(27,17))(children: DecimalColDivideDecimalScalar(col 2:decimal(23,14), val 3.4) -> 9:decimal(27,17)) -> 10:decimal(38,17), DecimalColModuloDecimalScalar(col 1:decimal(20,10), val 10) -> 11:decimal(12,10), CastDecimalToLong(col 1:decimal(20,10)) -> 12:int, CastDecimalToLong(col 2:decimal(23,14)) -> 13:smallint, CastDecimalToLong(col 2:decimal(23,14)) -> 14:tinyint, CastDecimalToLong(col 1:decimal(20,10)) -> 15:bigint, CastDecimalToBoolean(col 1:decimal(20,10)) -> 16:boolean, CastDecimalToDouble(col 2:decimal(23,14)) -> 17:double, CastDecimalToDouble(col 1:decimal(20,10)) -> 18:float, CastDecimalToString(col 2:decimal(23,14)) -> 19:string, CastDecimalToTimestamp(col 1:decimal(20,10)) -> 20:timestamp - Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 455 Data size: 78802 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(25,14)), _col1 (type: decimal(26,14)), _col2 (type: decimal(38,13)), _col3 (type: decimal(38,17)), _col4 (type: decimal(12,10)), _col5 (type: int), _col6 (type: smallint), _col7 (type: tinyint), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: double), _col11 (type: float), _col12 (type: string), _col13 (type: timestamp) sort order: ++++++++++++++ @@ -63,7 +80,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 455 Data size: 78802 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -89,7 +106,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(25,14)), KEY.reducesinkkey1 (type: decimal(26,14)), KEY.reducesinkkey2 (type: decimal(38,13)), KEY.reducesinkkey3 (type: decimal(38,17)), KEY.reducesinkkey4 (type: decimal(12,10)), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: smallint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: bigint), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: string), KEY.reducesinkkey13 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 455 Data size: 78809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 455 Data size: 78802 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 1730 Basic stats: COMPLETE Column stats: NONE @@ -129,6 +146,19 @@ POSTHOOK: Input: default@decimal_test 1895.51268191268460 -1203.53347193346920 0.8371969190171 262050.87567567649292835 2.4972972973 862 1033 NULL 862 true 1033.0153846153846 862.4973 1033.0153846153846 1969-12-31 16:14:22.497297297 1909.95218295221550 -1212.70166320163100 0.8371797936946 266058.54729730725574014 9.0675675676 869 1040 NULL 869 true 1040.8846153846155 869.06757 1040.8846153846155 1969-12-31 16:14:29.067567567 1913.89022869026920 -1215.20207900203840 0.8371751679996 267156.82702703945592392 0.8594594595 870 1043 NULL 870 true 1043.0307692307692 870.85944 1043.0307692307692 1969-12-31 16:14:30.859459459 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_test +#### A masked pattern was here #### +-1300490595129 PREHOOK: query: CREATE TABLE decimal_test_small STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(10,3)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(7,2)) AS cdecimal2 FROM alltypesorc PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@alltypesorc @@ -260,3 +290,16 @@ POSTHOOK: Input: default@decimal_test_small 1895.517 -1203.543 0.83719289075 262051.956361764 2.497 862 1033 NULL 862 true 1033.02 862.497 1033.02 1969-12-31 16:14:22.497 1909.948 -1212.692 0.83718392130 266057.499543968 9.068 869 1040 NULL 869 true 1040.88 869.068 1040.88 1969-12-31 16:14:29.068 1913.889 -1215.201 0.83717534491 267156.488691411 0.859 870 1043 NULL 870 true 1043.03 870.859 1043.03 1969-12-31 16:14:30.859 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test_small WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_test_small +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT cdecimal1 + cdecimal2 as c1, cdecimal1 - (2*cdecimal2) as c2, ((cdecimal1+2.34)/cdecimal2) as c3, (cdecimal1 * (cdecimal2/3.4)) as c4, cdecimal1 % 10 as c5, CAST(cdecimal1 AS INT) as c6, CAST(cdecimal2 AS SMALLINT) as c7, CAST(cdecimal2 AS TINYINT) as c8, CAST(cdecimal1 AS BIGINT) as c9, CAST (cdecimal1 AS BOOLEAN) as c10, CAST(cdecimal2 AS DOUBLE) as c11, CAST(cdecimal1 AS FLOAT) as c12, CAST(cdecimal2 AS STRING) as c13, CAST(cdecimal1 AS TIMESTAMP) as c14 FROM decimal_test_small WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL +ORDER BY c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_test_small +#### A masked pattern was here #### +774841630076 diff --git ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out index 9f4d478..0ee65eb 100644 --- ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out +++ ql/src/test/results/clientpositive/vector_decimal_math_funcs.q.out @@ -12,6 +12,18 @@ POSTHOOK: Lineage: decimal_test.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSc POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +PREHOOK: query: insert into decimal_test values (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_test +POSTHOOK: query: insert into decimal_test values (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_test +POSTHOOK: Lineage: decimal_test.cbigint EXPRESSION [] +POSTHOOK: Lineage: decimal_test.cdecimal1 EXPRESSION [] +POSTHOOK: Lineage: decimal_test.cdecimal2 EXPRESSION [] +POSTHOOK: Lineage: decimal_test.cdouble EXPRESSION [] PREHOOK: query: explain vectorization detail select cdecimal1 @@ -100,7 +112,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_test - Statistics: Num rows: 12288 Data size: 2201752 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2201752 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cbigint:bigint, 1:cdouble:double, 2:cdecimal1:decimal(20,10), 3:cdecimal2:decimal(23,14), 4:ROW__ID:struct] @@ -110,7 +122,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 5:bigint, val 0)(children: LongColModuloLongScalar(col 0:bigint, val 500) -> 5:bigint), FilterDoubleColGreaterEqualDoubleScalar(col 7:double, val -1.0)(children: FuncSinDoubleToDouble(col 6:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 6:double) -> 7:double)) predicate: (((cbigint % 500) = 0) and (sin(cdecimal1) >= -1.0)) (type: boolean) - Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 366928 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)), round(cdecimal1, 2) (type: decimal(13,2)), round(cdecimal1) (type: decimal(11,0)), floor(cdecimal1) (type: decimal(11,0)), ceil(cdecimal1) (type: decimal(11,0)), round(exp(cdecimal1), 58) (type: double), ln(cdecimal1) (type: double), log10(cdecimal1) (type: double), log2(cdecimal1) (type: double), log2((cdecimal1 - 15601)) (type: double), log(2, cdecimal1) (type: double), power(log2(cdecimal1), 2) (type: double), power(log2(cdecimal1), 2) (type: double), sqrt(cdecimal1) (type: double), abs(cdecimal1) (type: decimal(20,10)), sin(cdecimal1) (type: double), asin(cdecimal1) (type: double), cos(cdecimal1) (type: double), acos(cdecimal1) (type: double), atan(cdecimal1) (type: double), degrees(cdecimal1) (type: double), radians(cdecimal1) (type: double), cdecimal1 (type: decimal(20,10)), (- cdecimal1) (type: decimal(20,10)), sign(cdecimal1) (type: int), cos(((- sin(log(cdecimal1))) + 3.14159)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 @@ -119,13 +131,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 8, 9, 10, 11, 6, 12, 13, 14, 16, 17, 7, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 2, 29, 5, 30] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 2:decimal(20,10), decimalPlaces 2) -> 8:decimal(13,2), FuncRoundDecimalToDecimal(col 2:decimal(20,10)) -> 9:decimal(11,0), FuncFloorDecimalToDecimal(col 2:decimal(20,10)) -> 10:decimal(11,0), FuncCeilDecimalToDecimal(col 2:decimal(20,10)) -> 11:decimal(11,0), RoundWithNumDigitsDoubleToDouble(col 7, decimalPlaces 58)(children: FuncExpDoubleToDouble(col 6:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 6:double) -> 7:double) -> 6:double, FuncLnDoubleToDouble(col 7:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 7:double) -> 12:double, FuncLog10DoubleToDouble(col 7:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 7:double) -> 13:double, FuncLog2DoubleToDouble(col 7:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 7:double) -> 14:double, FuncLog2DoubleToDouble(col 7:double)(children: CastDecimalToDouble(col 15:decimal(21,10))(children: DecimalColSubtractDecimalScalar(col 2:decimal(20,10), val 15601) -> 15:decimal(21,10)) -> 7:double) -> 16:double, FuncLogWithBaseDoubleToDouble(col 7:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 7:double) -> 17:double, FuncPowerDoubleToDouble(col 18:double)(children: FuncLog2DoubleToDouble(col 7:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 7:double) -> 18:double) -> 7:double, FuncPowerDoubleToDouble(col 19:double)(children: FuncLog2DoubleToDouble(col 18:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 18:double) -> 19:double) -> 18:double, FuncSqrtDoubleToDouble(col 19:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 19:double) -> 20:double, FuncAbsDecimalToDecimal(col 2:decimal(20,10)) -> 21:decimal(20,10), FuncSinDoubleToDouble(col 19:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 19:double) -> 22:double, FuncASinDoubleToDouble(col 19:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 19:double) -> 23:double, FuncCosDoubleToDouble(col 19:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 19:double) -> 24:double, FuncACosDoubleToDouble(col 19:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 19:double) -> 25:double, FuncATanDoubleToDouble(col 19:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 19:double) -> 26:double, FuncDegreesDoubleToDouble(col 19:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 19:double) -> 27:double, FuncRadiansDoubleToDouble(col 19:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 19:double) -> 28:double, FuncNegateDecimalToDecimal(col 2:decimal(20,10)) -> 29:decimal(20,10), FuncSignDecimalToLong(col 2:decimal(20,10)) -> 5:int, FuncCosDoubleToDouble(col 19:double)(children: DoubleColAddDoubleScalar(col 30:double, val 3.14159)(children: DoubleColUnaryMinus(col 19:double)(children: FuncSinDoubleToDouble(col 30:double)(children: FuncLnDoubleToDouble(col 19:double)(children: CastDecimalToDouble(col 2:decimal(20,10)) -> 19:double) -> 30:double) -> 19:double) -> 30:double) -> 19:double) -> 30:double - Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 366928 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 366958 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 366928 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/vector_decimal_udf2.q.out index 541adfb..2bcedd0 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf2.q.out @@ -48,6 +48,16 @@ POSTHOOK: Input: default@decimal_udf2_txt POSTHOOK: Output: default@decimal_udf2 POSTHOOK: Lineage: decimal_udf2.key SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:key, type:decimal(14,5), comment:null), ] POSTHOOK: Lineage: decimal_udf2.value SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:value, type:int, comment:null), ] +PREHOOK: query: insert into DECIMAL_UDF2 values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_udf2 +POSTHOOK: query: insert into DECIMAL_UDF2 values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_udf2 +POSTHOOK: Lineage: decimal_udf2.key EXPRESSION [] +POSTHOOK: Lineage: decimal_udf2.value EXPRESSION [] PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 @@ -70,7 +80,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf2 - Statistics: Num rows: 38 Data size: 4072 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 39 Data size: 4072 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(14,5), 1:value:int, 2:ROW__ID:struct] @@ -80,7 +90,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColEqualDecimalScalar(col 0:decimal(14,5), val 10) predicate: (key = 10) (type: boolean) - Statistics: Num rows: 19 Data size: 2036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19 Data size: 1983 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: null (type: double), null (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -89,13 +99,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9] selectExpressions: ConstantVectorExpression(val null) -> 3:double, ConstantVectorExpression(val null) -> 4:double, ConstantVectorExpression(val 1.4711276743037347) -> 5:double, ConstantVectorExpression(val -0.8390715290764524) -> 6:double, ConstantVectorExpression(val -0.5440211108893698) -> 7:double, ConstantVectorExpression(val 0.6483608274590866) -> 8:double, ConstantVectorExpression(val 0.17453292519943295) -> 9:double - Statistics: Num rows: 19 Data size: 2036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19 Data size: 1983 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 19 Data size: 2036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19 Data size: 1983 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -134,6 +144,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### NULL NULL 1.4711276743037347 -0.8390715290764524 -0.5440211108893698 0.6483608274590866 0.17453292519943295 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) +FROM DECIMAL_UDF2) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) +FROM DECIMAL_UDF2) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf2 +#### A masked pattern was here #### +-3806952922 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), @@ -162,7 +185,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf2 - Statistics: Num rows: 38 Data size: 4072 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 39 Data size: 4072 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(14,5), 1:value:int, 2:ROW__ID:struct] @@ -172,7 +195,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColEqualDecimalScalar(col 0:decimal(14,5), val 10) predicate: (key = 10) (type: boolean) - Statistics: Num rows: 19 Data size: 2036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19 Data size: 1983 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -181,13 +204,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10] selectExpressions: ConstantVectorExpression(val 22026.465794806718) -> 3:double, ConstantVectorExpression(val 2.302585092994046) -> 4:double, ConstantVectorExpression(val 2.302585092994046) -> 5:double, ConstantVectorExpression(val 1.0) -> 6:double, FuncLogWithBaseLongToDouble(col 1:double) -> 7:double, VectorUDFAdaptor(log(value, 10)) -> 8:double, ConstantVectorExpression(val 1.0) -> 9:double, ConstantVectorExpression(val 3.1622776601683795) -> 10:double - Statistics: Num rows: 19 Data size: 2036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19 Data size: 1983 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 19 Data size: 2036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19 Data size: 1983 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -232,6 +255,25 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### 22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF2) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF2) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf2 +#### A masked pattern was here #### +1514360349 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2_txt WHERE key = 10 @@ -318,6 +360,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2_txt #### A masked pattern was here #### NULL NULL 1.4711276743037347 -0.8390715290764524 -0.5440211108893698 0.6483608274590866 0.17453292519943295 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) +FROM DECIMAL_UDF2_txt) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf2_txt +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) +FROM DECIMAL_UDF2_txt) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf2_txt +#### A masked pattern was here #### +-3806952922 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT exp(key), ln(key), @@ -416,6 +471,25 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2_txt #### A masked pattern was here #### 22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 +PREHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF2_txt) q +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf2_txt +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(*)) +FROM (SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF2_txt) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf2_txt +#### A masked pattern was here #### +1514360349 PREHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF2_txt PREHOOK: type: DROPTABLE PREHOOK: Input: default@decimal_udf2_txt diff --git ql/src/test/results/clientpositive/vector_if_expr_2.q.out ql/src/test/results/clientpositive/vector_if_expr_2.q.out deleted file mode 100644 index fe4f77c..0000000 --- ql/src/test/results/clientpositive/vector_if_expr_2.q.out +++ /dev/null @@ -1,119 +0,0 @@ -PREHOOK: query: drop table if exists foo -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists foo -POSTHOOK: type: DROPTABLE -PREHOOK: query: create temporary table foo (x int, y int) stored as orc -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@foo -POSTHOOK: query: create temporary table foo (x int, y int) stored as orc -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@foo -PREHOOK: query: insert into foo values(1,1),(2,NULL),(3,1) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@foo -POSTHOOK: query: insert into foo values(1,1),(2,NULL),(3,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@foo -POSTHOOK: Lineage: foo.x SCRIPT [] -POSTHOOK: Lineage: foo.y SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -select x, IF(x > 0,y,0) from foo order by x -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -select x, IF(x > 0,y,0) from foo order by x -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: foo - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Select Operator - expressions: x (type: int), if((x > 0), y, 0) (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 4] - selectExpressions: IfExprLongColumnLongScalar(col 3:boolean, col 1:int, val 0)(children: LongColGreaterLongScalar(col 0:int, val 0) -> 3:boolean) -> 4:int - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select x, IF(x > 0,y,0) from foo order by x -PREHOOK: type: QUERY -PREHOOK: Input: default@foo -#### A masked pattern was here #### -POSTHOOK: query: select x, IF(x > 0,y,0) from foo order by x -POSTHOOK: type: QUERY -POSTHOOK: Input: default@foo -#### A masked pattern was here #### -1 1 -2 NULL -3 1 -PREHOOK: query: select x, IF(x > 0,y,0) from foo order by x -PREHOOK: type: QUERY -PREHOOK: Input: default@foo -#### A masked pattern was here #### -POSTHOOK: query: select x, IF(x > 0,y,0) from foo order by x -POSTHOOK: type: QUERY -POSTHOOK: Input: default@foo -#### A masked pattern was here #### -1 1 -2 NULL -3 1 diff --git ql/src/test/results/clientpositive/vector_interval_1.q.out ql/src/test/results/clientpositive/vector_interval_1.q.out index 03dad18..1c7df24 100644 --- ql/src/test/results/clientpositive/vector_interval_1.q.out +++ ql/src/test/results/clientpositive/vector_interval_1.q.out @@ -24,6 +24,7 @@ POSTHOOK: Lineage: vector_interval_1.dt SIMPLE [] POSTHOOK: Lineage: vector_interval_1.str1 SIMPLE [] POSTHOOK: Lineage: vector_interval_1.str2 SIMPLE [] POSTHOOK: Lineage: vector_interval_1.ts SIMPLE [] +_c0 _c1 _c2 _c3 PREHOOK: query: insert into vector_interval_1 select null, null, null, null from src limit 1 PREHOOK: type: QUERY @@ -38,6 +39,18 @@ POSTHOOK: Lineage: vector_interval_1.dt EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION [] POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION [] +_col0 _col1 _col2 _col3 +PREHOOK: query: select * from vector_interval_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from vector_interval_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +vector_interval_1.ts vector_interval_1.dt vector_interval_1.str1 vector_interval_1.str2 +2001-01-01 01:02:03 2001-01-01 1-2 1 2:3:4 +NULL NULL NULL NULL PREHOOK: query: explain vectorization expression select str1, @@ -52,6 +65,7 @@ select interval '1 2:3:4' day to second, interval_day_time(str2) from vector_interval_1 order by str1 POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -137,6 +151,7 @@ from vector_interval_1 order by str1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +str1 _c1 _c2 _c3 _c4 NULL 1-2 NULL 1 02:03:04.000000000 NULL 1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000 PREHOOK: query: explain vectorization expression @@ -161,6 +176,7 @@ select interval '1-2' year to month - interval_year_month(str1) from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -254,6 +270,7 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 _c4 _c5 _c6 NULL 2-4 NULL NULL 0-0 NULL NULL 2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0 PREHOOK: query: explain vectorization expression @@ -278,6 +295,7 @@ select interval '1 2:3:4' day to second - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -371,6 +389,7 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 _c4 _c5 _c6 NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL NULL 2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2 04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: explain vectorization expression @@ -407,6 +426,7 @@ select dt - interval_day_time(str2) from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -512,6 +532,7 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01 1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56 2000-12-30 21:56:56 PREHOOK: query: explain vectorization expression @@ -548,6 +569,7 @@ select ts - interval_day_time(str2) from vector_interval_1 order by ts POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -653,6 +675,7 @@ from vector_interval_1 order by ts POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +ts _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03 1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59 2000-12-30 22:58:59 PREHOOK: query: explain vectorization expression @@ -671,6 +694,7 @@ select ts - timestamp '2001-01-01 01:02:03' from vector_interval_1 order by ts POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -758,6 +782,7 @@ from vector_interval_1 order by ts POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +ts _c1 _c2 _c3 NULL NULL NULL NULL 2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: explain vectorization expression @@ -776,6 +801,7 @@ select dt - date '2001-01-01' from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -863,6 +889,7 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 NULL NULL NULL NULL 2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 PREHOOK: query: explain vectorization expression @@ -887,6 +914,7 @@ select date '2001-01-01' - ts from vector_interval_1 order by dt POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -980,5 +1008,6 @@ from vector_interval_1 order by dt POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_interval_1 #### A masked pattern was here #### +dt _c1 _c2 _c3 _c4 _c5 _c6 NULL NULL NULL NULL NULL NULL NULL 2001-01-01 0 01:02:03.000000000 0 01:02:03.000000000 0 01:02:03.000000000 -0 01:02:03.000000000 -0 01:02:03.000000000 -0 01:02:03.000000000 diff --git ql/src/test/results/clientpositive/vector_like_2.q.out ql/src/test/results/clientpositive/vector_like_2.q.out deleted file mode 100644 index 26ff792..0000000 --- ql/src/test/results/clientpositive/vector_like_2.q.out +++ /dev/null @@ -1,125 +0,0 @@ -PREHOOK: query: drop table if exists foo -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists foo -POSTHOOK: type: DROPTABLE -PREHOOK: query: create temporary table foo (a string) stored as orc -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@foo -POSTHOOK: query: create temporary table foo (a string) stored as orc -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@foo -PREHOOK: query: insert into foo values("some foo"),("some bar"),(null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@foo -POSTHOOK: query: insert into foo values("some foo"),("some bar"),(null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@foo -POSTHOOK: Lineage: foo.a SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -select a, a like "%bar" from foo order by a -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -select a, a like "%bar" from foo order by a -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: foo - Statistics: Num rows: 3 Data size: 184 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:string, 1:ROW__ID:struct] - Select Operator - expressions: a (type: string), (a like '%bar') (type: boolean) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] - selectExpressions: SelectStringColLikeStringScalar(col 0:string) -> 2:boolean - Statistics: Num rows: 3 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: a:string - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 184 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 184 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select a, a like "%bar" from foo order by a -PREHOOK: type: QUERY -PREHOOK: Input: default@foo -#### A masked pattern was here #### -POSTHOOK: query: select a, a like "%bar" from foo order by a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@foo -#### A masked pattern was here #### -NULL NULL -some bar true -some foo false -PREHOOK: query: select a, a like "%bar" from foo order by a -PREHOOK: type: QUERY -PREHOOK: Input: default@foo -#### A masked pattern was here #### -POSTHOOK: query: select a, a like "%bar" from foo order by a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@foo -#### A masked pattern was here #### -NULL NULL -some bar true -some foo false diff --git ql/src/test/results/clientpositive/vector_order_null.q.out ql/src/test/results/clientpositive/vector_order_null.q.out deleted file mode 100644 index c50e275..0000000 --- ql/src/test/results/clientpositive/vector_order_null.q.out +++ /dev/null @@ -1,1130 +0,0 @@ -PREHOOK: query: create table src_null (a int, b string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_null -POSTHOOK: query: create table src_null (a int, b string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_null -PREHOOK: query: insert into src_null values (1, 'A') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@src_null -POSTHOOK: query: insert into src_null values (1, 'A') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.a SCRIPT [] -POSTHOOK: Lineage: src_null.b SCRIPT [] -col1 col2 -PREHOOK: query: insert into src_null values (null, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@src_null -POSTHOOK: query: insert into src_null values (null, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.a EXPRESSION [] -POSTHOOK: Lineage: src_null.b EXPRESSION [] -_col0 _col1 -PREHOOK: query: insert into src_null values (3, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@src_null -POSTHOOK: query: insert into src_null values (3, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.a SCRIPT [] -POSTHOOK: Lineage: src_null.b EXPRESSION [] -_col0 _col1 -PREHOOK: query: insert into src_null values (2, null) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@src_null -POSTHOOK: query: insert into src_null values (2, null) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.a SCRIPT [] -POSTHOOK: Lineage: src_null.b EXPRESSION [] -_col0 _col1 -PREHOOK: query: insert into src_null values (2, 'A') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@src_null -POSTHOOK: query: insert into src_null values (2, 'A') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.a SCRIPT [] -POSTHOOK: Lineage: src_null.b SCRIPT [] -col1 col2 -PREHOOK: query: insert into src_null values (2, 'B') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@src_null -POSTHOOK: query: insert into src_null values (2, 'B') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.a SCRIPT [] -POSTHOOK: Lineage: src_null.b SCRIPT [] -col1 col2 -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a asc, b asc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a asc, b asc -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc, b asc -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc, b asc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -NULL NULL -1 A -2 NULL -2 A -2 B -3 NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a desc, b asc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a desc, b asc -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: -+ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc, b asc -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc, b asc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -3 NULL -2 NULL -2 A -2 B -1 A -NULL NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc, a asc nulls last -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -2 NULL -3 NULL -NULL NULL -1 A -2 A -2 B -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b desc, a asc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b desc, a asc -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: -+ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc, a asc -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc, a asc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -2 B -1 A -2 A -NULL NULL -2 NULL -3 NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first, b asc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -NULL NULL -1 A -2 NULL -2 A -2 B -3 NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: -+ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first, b asc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -NULL NULL -3 NULL -2 NULL -2 A -2 B -1 A -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b asc nulls last, a -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b asc nulls last, a -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last, a -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last, a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -1 A -2 A -2 B -NULL NULL -2 NULL -3 NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b desc nulls last, a -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b desc nulls last, a -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: -+ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -2 B -1 A -2 A -NULL NULL -2 NULL -3 NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: +- - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -1 A -2 B -2 A -2 NULL -3 NULL -NULL NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: -- - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -2 B -2 A -1 A -3 NULL -2 NULL -NULL NULL -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:string, 2:ROW__ID:struct] - Select Operator - expressions: a (type: int), b (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: a:int, b:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -x.a x.b -2 NULL -3 NULL -NULL NULL -1 A -2 A -2 B diff --git ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out deleted file mode 100644 index fe68e5c..0000000 --- ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out +++ /dev/null @@ -1,2376 +0,0 @@ -PREHOOK: query: DROP TABLE IF EXISTS e011_01 -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS e011_01 -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE IF EXISTS e011_02 -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS e011_02 -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE IF EXISTS e011_03 -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS e011_03 -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE e011_01 ( - c1 decimal(15,2), - c2 decimal(15,2)) - STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@e011_01 -POSTHOOK: query: CREATE TABLE e011_01 ( - c1 decimal(15,2), - c2 decimal(15,2)) - STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@e011_01 -PREHOOK: query: CREATE TABLE e011_02 ( - c1 decimal(15,2), - c2 decimal(15,2)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@e011_02 -POSTHOOK: query: CREATE TABLE e011_02 ( - c1 decimal(15,2), - c2 decimal(15,2)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@e011_02 -PREHOOK: query: CREATE TABLE e011_03 ( - c1 decimal(15,2), - c2 decimal(15,2)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@e011_03 -POSTHOOK: query: CREATE TABLE e011_03 ( - c1 decimal(15,2), - c2 decimal(15,2)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@e011_03 -PREHOOK: query: CREATE TABLE e011_01_small ( - c1 decimal(7,2), - c2 decimal(7,2)) - STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@e011_01_small -POSTHOOK: query: CREATE TABLE e011_01_small ( - c1 decimal(7,2), - c2 decimal(7,2)) - STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@e011_01_small -PREHOOK: query: CREATE TABLE e011_02_small ( - c1 decimal(7,2), - c2 decimal(7,2)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@e011_02_small -POSTHOOK: query: CREATE TABLE e011_02_small ( - c1 decimal(7,2), - c2 decimal(7,2)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@e011_02_small -PREHOOK: query: CREATE TABLE e011_03_small ( - c1 decimal(7,2), - c2 decimal(7,2)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@e011_03_small -POSTHOOK: query: CREATE TABLE e011_03_small ( - c1 decimal(7,2), - c2 decimal(7,2)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@e011_03_small -PREHOOK: query: LOAD DATA - LOCAL INPATH '../../data/files/e011_01.txt' - OVERWRITE - INTO TABLE e011_01 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@e011_01 -POSTHOOK: query: LOAD DATA - LOCAL INPATH '../../data/files/e011_01.txt' - OVERWRITE - INTO TABLE e011_01 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@e011_01 -PREHOOK: query: INSERT INTO TABLE e011_02 - SELECT c1, c2 - FROM e011_01 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -PREHOOK: Output: default@e011_02 -POSTHOOK: query: INSERT INTO TABLE e011_02 - SELECT c1, c2 - FROM e011_01 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -POSTHOOK: Output: default@e011_02 -POSTHOOK: Lineage: e011_02.c1 SIMPLE [(e011_01)e011_01.FieldSchema(name:c1, type:decimal(15,2), comment:null), ] -POSTHOOK: Lineage: e011_02.c2 SIMPLE [(e011_01)e011_01.FieldSchema(name:c2, type:decimal(15,2), comment:null), ] -c1 c2 -PREHOOK: query: INSERT INTO TABLE e011_03 - SELECT c1, c2 - FROM e011_01 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -PREHOOK: Output: default@e011_03 -POSTHOOK: query: INSERT INTO TABLE e011_03 - SELECT c1, c2 - FROM e011_01 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -POSTHOOK: Output: default@e011_03 -POSTHOOK: Lineage: e011_03.c1 SIMPLE [(e011_01)e011_01.FieldSchema(name:c1, type:decimal(15,2), comment:null), ] -POSTHOOK: Lineage: e011_03.c2 SIMPLE [(e011_01)e011_01.FieldSchema(name:c2, type:decimal(15,2), comment:null), ] -c1 c2 -PREHOOK: query: LOAD DATA - LOCAL INPATH '../../data/files/e011_01.txt' - OVERWRITE - INTO TABLE e011_01_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@e011_01_small -POSTHOOK: query: LOAD DATA - LOCAL INPATH '../../data/files/e011_01.txt' - OVERWRITE - INTO TABLE e011_01_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@e011_01_small -PREHOOK: query: INSERT INTO TABLE e011_02_small - SELECT c1, c2 - FROM e011_01_small -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -PREHOOK: Output: default@e011_02_small -POSTHOOK: query: INSERT INTO TABLE e011_02_small - SELECT c1, c2 - FROM e011_01_small -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -POSTHOOK: Output: default@e011_02_small -POSTHOOK: Lineage: e011_02_small.c1 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c1, type:decimal(7,2), comment:null), ] -POSTHOOK: Lineage: e011_02_small.c2 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c2, type:decimal(7,2), comment:null), ] -c1 c2 -PREHOOK: query: INSERT INTO TABLE e011_03_small - SELECT c1, c2 - FROM e011_01_small -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -PREHOOK: Output: default@e011_03_small -POSTHOOK: query: INSERT INTO TABLE e011_03_small - SELECT c1, c2 - FROM e011_01_small -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -POSTHOOK: Output: default@e011_03_small -POSTHOOK: Lineage: e011_03_small.c1 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c1, type:decimal(7,2), comment:null), ] -POSTHOOK: Lineage: e011_03_small.c2 SIMPLE [(e011_01_small)e011_01_small.FieldSchema(name:c2, type:decimal(7,2), comment:null), ] -c1 c2 -PREHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -PREHOOK: Output: default@e011_01 -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -POSTHOOK: Output: default@e011_01 -#### A masked pattern was here #### -_c0 _c1 -PREHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_02 -PREHOOK: Output: default@e011_02 -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_02 -POSTHOOK: Output: default@e011_02 -#### A masked pattern was here #### -_c0 _c1 -PREHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_03 -PREHOOK: Output: default@e011_03 -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_03 -POSTHOOK: Output: default@e011_03 -#### A masked pattern was here #### -_c0 _c1 -PREHOOK: query: ANALYZE TABLE e011_01_small COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -PREHOOK: Output: default@e011_01_small -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE e011_01_small COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -POSTHOOK: Output: default@e011_01_small -#### A masked pattern was here #### -_c0 _c1 -PREHOOK: query: ANALYZE TABLE e011_02_small COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_02_small -PREHOOK: Output: default@e011_02_small -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE e011_02_small COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_02_small -POSTHOOK: Output: default@e011_02_small -#### A masked pattern was here #### -_c0 _c1 -PREHOOK: query: ANALYZE TABLE e011_03_small COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_03_small -PREHOOK: Output: default@e011_03_small -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE e011_03_small COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_03_small -POSTHOOK: Output: default@e011_03_small -#### A masked pattern was here #### -_c0 _c1 -PREHOOK: query: explain vectorization detail -select sum(sum(c1)) over() from e011_01 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(c1)) over() from e011_01 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:c1:decimal(15,2)/DECIMAL_64, 1:c2:decimal(15,2)/DECIMAL_64, 2:ROW__ID:struct] - Select Operator - expressions: c1 (type: decimal(15,2)) - outputColumnNames: c1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(c1) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal64ToDecimal(col 0:decimal(15,2)/DECIMAL_64) -> decimal(25,2) - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: c1:decimal(15,2)/DECIMAL_64, c2:decimal(15,2)/DECIMAL_64 - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(25,2)] - Reduce Output Operator - key expressions: 0 (type: int) - sort order: + - Map-reduce partition columns: 0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:decimal(25,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: decimal(25,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(25,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: 0 ASC NULLS FIRST - partition by: 0 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col0 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(35,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(c1)) over() from e011_01 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(c1)) over() from e011_01 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -#### A masked pattern was here #### -_c0 -16.00 -PREHOOK: query: explain vectorization detail -select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01 - group by e011_01.c1, e011_01.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01 - group by e011_01.c1, e011_01.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:c1:decimal(15,2)/DECIMAL_64, 1:c2:decimal(15,2)/DECIMAL_64, 2:ROW__ID:struct] - Select Operator - expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - outputColumnNames: c1, c2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(c1) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal64ToDecimal(col 0:decimal(15,2)/DECIMAL_64) -> decimal(25,2) - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(15,2)/DECIMAL_64) -> 3:decimal(15,2), ConvertDecimal64ToDecimal(col 1:decimal(15,2)/DECIMAL_64) -> 4:decimal(15,2) - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: c1:decimal(15,2)/DECIMAL_64, c2:decimal(15,2)/DECIMAL_64 - partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(15,2), decimal(15,2)] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(15,2), 1:_col1:decimal(15,2), 2:_col2:decimal(25,2)] - Reduce Output Operator - key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col1 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(15,2), _col1:decimal(15,2), _col2:decimal(25,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(35,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01 - group by e011_01.c1, e011_01.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01 - group by e011_01.c1, e011_01.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -#### A masked pattern was here #### -_c0 -1.00 -3.00 -5.00 -7.00 -PREHOOK: query: explain vectorization detail -select sum(sum(e011_01.c1)) over( - partition by e011_01.c2 order by e011_01.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_01.c1, e011_01.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(e011_01.c1)) over( - partition by e011_01.c2 order by e011_01.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_01.c1, e011_01.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(15,2)) - TableScan - alias: e011_03 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Map Vectorization: - enabled: false - enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(15,2)) - 1 _col0 (type: decimal(15,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(15,2), 1:_col1:decimal(15,2), 2:_col2:decimal(25,2)] - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(15,2), _col1:decimal(15,2), _col2:decimal(25,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(15,2), 1:_col1:decimal(15,2), 2:_col2:decimal(25,2)] - Reduce Output Operator - key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col1 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(15,2), _col1:decimal(15,2), _col2:decimal(25,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(35,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(e011_01.c1)) over( - partition by e011_01.c2 order by e011_01.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_01.c1, e011_01.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -PREHOOK: Input: default@e011_03 -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(e011_01.c1)) over( - partition by e011_01.c2 order by e011_01.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_01.c1, e011_01.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -POSTHOOK: Input: default@e011_03 -#### A masked pattern was here #### -_c0 -1.00 -3.00 -5.00 -7.00 -PREHOOK: query: explain vectorization detail -select sum(sum(e011_01.c1)) over( - partition by e011_03.c2 order by e011_03.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c1, e011_03.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(e011_01.c1)) over( - partition by e011_03.c2 order by e011_03.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c1, e011_03.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_03 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(15,2)) - TableScan - alias: e011_01 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map Vectorization: - enabled: false - enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(15,2)) - 1 _col0 (type: decimal(15,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(15,2), 1:_col1:decimal(15,2), 2:_col2:decimal(25,2)] - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(15,2), _col1:decimal(15,2), _col2:decimal(25,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(15,2), 1:_col1:decimal(15,2), 2:_col2:decimal(25,2)] - Reduce Output Operator - key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col1 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(25,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(15,2), _col1:decimal(15,2), _col2:decimal(25,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(35,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(e011_01.c1)) over( - partition by e011_03.c2 order by e011_03.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c1, e011_03.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -PREHOOK: Input: default@e011_03 -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(e011_01.c1)) over( - partition by e011_03.c2 order by e011_03.c1) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c1, e011_03.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -POSTHOOK: Input: default@e011_03 -#### A masked pattern was here #### -_c0 -1.00 -3.00 -5.00 -7.00 -PREHOOK: query: explain vectorization detail -select sum(corr(e011_01.c1, e011_03.c1)) - over(partition by e011_01.c2 order by e011_03.c2) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c2, e011_01.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(corr(e011_01.c1, e011_03.c1)) - over(partition by e011_01.c2 order by e011_03.c2) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c2, e011_01.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(15,2)) - TableScan - alias: e011_03 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(15,2)) - Map Vectorization: - enabled: false - enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(15,2)) - 1 _col0 (type: decimal(15,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: corr(_col0, _col2) - keys: _col1 (type: decimal(15,2)), _col3 (type: decimal(15,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(15,2), 1:_col1:decimal(15,2), 2:_col2:struct] - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(15,2), _col1:decimal(15,2), _col2:struct - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: corr(VALUE._col0) - keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumDouble - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(corr(e011_01.c1, e011_03.c1)) - over(partition by e011_01.c2 order by e011_03.c2) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c2, e011_01.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01 -PREHOOK: Input: default@e011_03 -#### A masked pattern was here #### -POSTHOOK: query: select sum(corr(e011_01.c1, e011_03.c1)) - over(partition by e011_01.c2 order by e011_03.c2) - from e011_01 - join e011_03 on e011_01.c1 = e011_03.c1 - group by e011_03.c2, e011_01.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01 -POSTHOOK: Input: default@e011_03 -#### A masked pattern was here #### -sum_window_0 -NULL -NULL -NULL -NULL -PREHOOK: query: explain vectorization detail -select sum(sum(c1)) over() from e011_01_small -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(c1)) over() from e011_01_small -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01_small - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:c1:decimal(7,2)/DECIMAL_64, 1:c2:decimal(7,2)/DECIMAL_64, 2:ROW__ID:struct] - Select Operator - expressions: c1 (type: decimal(7,2)) - outputColumnNames: c1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(c1) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal64(col 0:decimal(7,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: c1:decimal(7,2)/DECIMAL_64, c2:decimal(7,2)/DECIMAL_64 - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(17,2)] - Reduce Output Operator - key expressions: 0 (type: int) - sort order: + - Map-reduce partition columns: 0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:decimal(17,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: decimal(17,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: 0 ASC NULLS FIRST - partition by: 0 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col0 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(27,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(c1)) over() from e011_01_small -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(c1)) over() from e011_01_small -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -#### A masked pattern was here #### -_c0 -16.00 -PREHOOK: query: explain vectorization detail -select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01_small - group by e011_01_small.c1, e011_01_small.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01_small - group by e011_01_small.c1, e011_01_small.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01_small - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:c1:decimal(7,2)/DECIMAL_64, 1:c2:decimal(7,2)/DECIMAL_64, 2:ROW__ID:struct] - Select Operator - expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - outputColumnNames: c1, c2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(c1) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal64(col 0:decimal(7,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(7,2)/DECIMAL_64) -> 3:decimal(7,2), ConvertDecimal64ToDecimal(col 1:decimal(7,2)/DECIMAL_64) -> 4:decimal(7,2) - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: c1:decimal(7,2)/DECIMAL_64, c2:decimal(7,2)/DECIMAL_64 - partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(7,2), decimal(7,2)] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(7,2), 1:_col1:decimal(7,2), 2:_col2:decimal(17,2)] - Reduce Output Operator - key expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col1 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(7,2), _col1:decimal(7,2), _col2:decimal(17,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(27,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01_small - group by e011_01_small.c1, e011_01_small.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(c1)) over( - partition by c2 order by c1) - from e011_01_small - group by e011_01_small.c1, e011_01_small.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -#### A masked pattern was here #### -_c0 -1.00 -3.00 -5.00 -7.00 -PREHOOK: query: explain vectorization detail -select sum(sum(e011_01_small.c1)) over( - partition by e011_01_small.c2 order by e011_01_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_01_small.c1, e011_01_small.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(e011_01_small.c1)) over( - partition by e011_01_small.c2 order by e011_01_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_01_small.c1, e011_01_small.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01_small - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)) - TableScan - alias: e011_03_small - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(7,2)) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Map Vectorization: - enabled: false - enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(7,2)) - 1 _col0 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - keys: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(7,2), 1:_col1:decimal(7,2), 2:_col2:decimal(17,2)] - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(7,2), _col1:decimal(7,2), _col2:decimal(17,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(7,2), 1:_col1:decimal(7,2), 2:_col2:decimal(17,2)] - Reduce Output Operator - key expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col1 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(7,2), _col1:decimal(7,2), _col2:decimal(17,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(27,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(e011_01_small.c1)) over( - partition by e011_01_small.c2 order by e011_01_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_01_small.c1, e011_01_small.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -PREHOOK: Input: default@e011_03_small -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(e011_01_small.c1)) over( - partition by e011_01_small.c2 order by e011_01_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_01_small.c1, e011_01_small.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -POSTHOOK: Input: default@e011_03_small -#### A masked pattern was here #### -_c0 -1.00 -3.00 -5.00 -7.00 -PREHOOK: query: explain vectorization detail -select sum(sum(e011_01_small.c1)) over( - partition by e011_03_small.c2 order by e011_03_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c1, e011_03_small.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(sum(e011_01_small.c1)) over( - partition by e011_03_small.c2 order by e011_03_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c1, e011_03_small.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_03_small - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)) - TableScan - alias: e011_01_small - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(7,2)) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map Vectorization: - enabled: false - enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(7,2)) - 1 _col0 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(7,2), 1:_col1:decimal(7,2), 2:_col2:decimal(17,2)] - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(7,2), _col1:decimal(7,2), _col2:decimal(17,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(7,2), 1:_col1:decimal(7,2), 2:_col2:decimal(17,2)] - Reduce Output Operator - key expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col1 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(7,2), _col1:decimal(7,2), _col2:decimal(17,2) - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: decimal(27,2)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(sum(e011_01_small.c1)) over( - partition by e011_03_small.c2 order by e011_03_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c1, e011_03_small.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -PREHOOK: Input: default@e011_03_small -#### A masked pattern was here #### -POSTHOOK: query: select sum(sum(e011_01_small.c1)) over( - partition by e011_03_small.c2 order by e011_03_small.c1) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c1, e011_03_small.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -POSTHOOK: Input: default@e011_03_small -#### A masked pattern was here #### -_c0 -1.00 -3.00 -5.00 -7.00 -PREHOOK: query: explain vectorization detail -select sum(corr(e011_01_small.c1, e011_03_small.c1)) - over(partition by e011_01_small.c2 order by e011_03_small.c2) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c2, e011_01_small.c2 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select sum(corr(e011_01_small.c1, e011_03_small.c1)) - over(partition by e011_01_small.c2 order by e011_03_small.c2) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c2, e011_01_small.c2 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: e011_01_small - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)) - TableScan - alias: e011_03_small - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)) - Map Vectorization: - enabled: false - enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(7,2)) - 1 _col0 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: corr(_col0, _col2) - keys: _col1 (type: decimal(7,2)), _col3 (type: decimal(7,2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:decimal(7,2), 1:_col1:decimal(7,2), 2:_col2:struct] - Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1, 2] - dataColumns: _col0:decimal(7,2), _col1:decimal(7,2), _col2:struct - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: corr(VALUE._col0) - keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: decimal(7,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumDouble - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(corr(e011_01_small.c1, e011_03_small.c1)) - over(partition by e011_01_small.c2 order by e011_03_small.c2) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c2, e011_01_small.c2 -PREHOOK: type: QUERY -PREHOOK: Input: default@e011_01_small -PREHOOK: Input: default@e011_03_small -#### A masked pattern was here #### -POSTHOOK: query: select sum(corr(e011_01_small.c1, e011_03_small.c1)) - over(partition by e011_01_small.c2 order by e011_03_small.c2) - from e011_01_small - join e011_03_small on e011_01_small.c1 = e011_03_small.c1 - group by e011_03_small.c2, e011_01_small.c2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@e011_01_small -POSTHOOK: Input: default@e011_03_small -#### A masked pattern was here #### -sum_window_0 -NULL -NULL -NULL -NULL diff --git ql/src/test/results/clientpositive/vector_udf2.q.out ql/src/test/results/clientpositive/vector_udf2.q.out deleted file mode 100644 index 222a901..0000000 --- ql/src/test/results/clientpositive/vector_udf2.q.out +++ /dev/null @@ -1,188 +0,0 @@ -PREHOOK: query: drop table varchar_udf_2 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table varchar_udf_2 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@varchar_udf_2 -POSTHOOK: query: create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@varchar_udf_2 -PREHOOK: query: insert overwrite table varchar_udf_2 - select key, value, key, value from src where key = '238' limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@varchar_udf_2 -POSTHOOK: query: insert overwrite table varchar_udf_2 - select key, value, key, value from src where key = '238' limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@varchar_udf_2 -POSTHOOK: Lineage: varchar_udf_2.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: varchar_udf_2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: varchar_udf_2.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: varchar_udf_2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain vectorization expression -select - c1 LIKE '%38%', - c2 LIKE 'val_%', - c3 LIKE '%38', - c1 LIKE '%3x8%', - c2 LIKE 'xval_%', - c3 LIKE '%x38' -from varchar_udf_2 limit 1 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression -select - c1 LIKE '%38%', - c2 LIKE 'val_%', - c3 LIKE '%38', - c1 LIKE '%3x8%', - c2 LIKE 'xval_%', - c3 LIKE '%x38' -from varchar_udf_2 limit 1 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: varchar_udf_2 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Select Operator - expressions: (c1 like '%38%') (type: boolean), (c2 like 'val_%') (type: boolean), (c3 like '%38') (type: boolean), (c1 like '%3x8%') (type: boolean), (c2 like 'xval_%') (type: boolean), (c3 like '%x38') (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [5, 6, 7, 8, 9, 10] - selectExpressions: SelectStringColLikeStringScalar(col 0:string) -> 5:boolean, SelectStringColLikeStringScalar(col 1:string) -> 6:boolean, SelectStringColLikeStringScalar(col 2:varchar(10)) -> 7:boolean, SelectStringColLikeStringScalar(col 0:string) -> 8:boolean, SelectStringColLikeStringScalar(col 1:string) -> 9:boolean, SelectStringColLikeStringScalar(col 2:varchar(10)) -> 10:boolean - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - - Stage: Stage-0 - Fetch Operator - limit: 1 - Processor Tree: - ListSink - -PREHOOK: query: select - c1 LIKE '%38%', - c2 LIKE 'val_%', - c3 LIKE '%38', - c1 LIKE '%3x8%', - c2 LIKE 'xval_%', - c3 LIKE '%x38' -from varchar_udf_2 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_2 -#### A masked pattern was here #### -POSTHOOK: query: select - c1 LIKE '%38%', - c2 LIKE 'val_%', - c3 LIKE '%38', - c1 LIKE '%3x8%', - c2 LIKE 'xval_%', - c3 LIKE '%x38' -from varchar_udf_2 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_2 -#### A masked pattern was here #### -true true true false false false -PREHOOK: query: drop table varchar_udf_2 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@varchar_udf_2 -PREHOOK: Output: default@varchar_udf_2 -POSTHOOK: query: drop table varchar_udf_2 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@varchar_udf_2 -POSTHOOK: Output: default@varchar_udf_2 -PREHOOK: query: create temporary table HIVE_14349 (a string) stored as orc -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@HIVE_14349 -POSTHOOK: query: create temporary table HIVE_14349 (a string) stored as orc -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@HIVE_14349 -PREHOOK: query: insert into HIVE_14349 values('XYZa'), ('badXYZa') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@hive_14349 -POSTHOOK: query: insert into HIVE_14349 values('XYZa'), ('badXYZa') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@hive_14349 -POSTHOOK: Lineage: hive_14349.a SCRIPT [] -PREHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a%' -PREHOOK: type: QUERY -PREHOOK: Input: default@hive_14349 -#### A masked pattern was here #### -POSTHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a%' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hive_14349 -#### A masked pattern was here #### -XYZa -PREHOOK: query: insert into HIVE_14349 values ('XYZab'), ('XYZabBAD'), ('badXYZab'), ('badXYZabc') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@hive_14349 -POSTHOOK: query: insert into HIVE_14349 values ('XYZab'), ('XYZabBAD'), ('badXYZab'), ('badXYZabc') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@hive_14349 -POSTHOOK: Lineage: hive_14349.a SCRIPT [] -PREHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a_' -PREHOOK: type: QUERY -PREHOOK: Input: default@hive_14349 -#### A masked pattern was here #### -POSTHOOK: query: select * from HIVE_14349 where a LIKE 'XYZ%a_' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hive_14349 -#### A masked pattern was here #### -XYZab -PREHOOK: query: drop table HIVE_14349 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@hive_14349 -PREHOOK: Output: default@hive_14349 -POSTHOOK: query: drop table HIVE_14349 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@hive_14349 -POSTHOOK: Output: default@hive_14349 diff --git ql/src/test/results/clientpositive/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/vectorization_nested_udf.q.out index bca2d2a..090c6c6 100644 --- ql/src/test/results/clientpositive/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/vectorization_nested_udf.q.out @@ -1,3 +1,97 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: abs(ctinyint) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + selectExpressions: FuncAbsLongToLong(col 0:tinyint) -> 13:int + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 13:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 50e9b0e..b89fa51 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -51,6 +51,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -87,6 +88,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -136,7 +143,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -154,7 +161,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -189,6 +196,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -225,6 +233,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -232,13 +246,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd @@ -261,6 +275,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -301,6 +316,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -339,14 +360,14 @@ from alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -5110 4607 -PREHOOK: query: explain vectorization expression +4086 3583 +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd @@ -369,6 +390,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (cint) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (cint) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -409,6 +431,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -466,10 +494,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.attr SCRIPT [] POSTHOOK: Lineage: test_1.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -489,6 +517,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -518,6 +547,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -536,10 +571,10 @@ POSTHOOK: Input: default@test_1 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -559,6 +594,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -588,6 +624,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -606,10 +648,10 @@ POSTHOOK: Input: default@test_1 3 4 1 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -629,6 +671,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -658,6 +701,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -694,10 +743,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.attr SCRIPT [] POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -717,6 +766,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -746,6 +796,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -764,10 +820,10 @@ POSTHOOK: Input: default@test_2 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -787,6 +843,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -816,6 +873,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -834,10 +897,10 @@ POSTHOOK: Input: default@test_2 3 4 NULL -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -857,6 +920,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 @@ -886,6 +950,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -904,3 +974,227 @@ POSTHOOK: Input: default@test_2 NULL NULL 4 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 248718130534 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index b743e64..5e25c47 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + /** * This class supports string and binary data by value reference -- i.e. each field is @@ -93,7 +95,12 @@ public void reset() { initBuffer(0); } - /** Set a field by reference. + /** + * Set a field by reference. + * + * This is a FAST version that assumes the caller has checked to make sure the sourceBuf + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. Only the output entry fields will be set by this method. * * @param elementNum index within column vector to set * @param sourceBuf container of source data @@ -161,6 +168,10 @@ public int bufferSize() { * DO NOT USE this method unless it's not practical to set data by reference with setRef(). * Setting data by reference tends to run a lot faster than copying data in. * + * This is a FAST version that assumes the caller has checked to make sure the sourceBuf + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. Only the output entry fields will be set by this method. + * * @param elementNum index within column vector to set * @param sourceBuf container of source data * @param start start byte position within source @@ -183,6 +194,10 @@ public void setVal(int elementNum, byte[] sourceBuf, int start, int length) { * DO NOT USE this method unless it's not practical to set data by reference with setRef(). * Setting data by reference tends to run a lot faster than copying data in. * + * This is a FAST version that assumes the caller has checked to make sure the sourceBuf + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. Only the output entry fields will be set by this method. + * * @param elementNum index within column vector to set * @param sourceBuf container of source data */ @@ -309,46 +324,86 @@ public void increaseBufferSpace(int nextElemLength) { /** Copy the current object contents into the output. Only copy selected entries, * as indicated by selectedInUse and the sel array. */ + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, BytesColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + BytesColumnVector output = (BytesColumnVector) outputColVector; + boolean[] outputIsNull = output.isNull; + + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.setVal(0, vector[0], start[0], length[0]); - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + outputIsNull[0] = false; + output.setVal(0, vector[0], start[0], length[0]); + } else { + outputIsNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.setVal(i, vector[i], start[i], length[i]); - } - } - else { - for (int i = 0; i < size; i++) { - output.setVal(i, vector[i], start[i], length[i]); + if (noNulls) { + if (selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + output.setVal(i, vector[i], start[i], length[i]); + } + } else { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + output.setVal(i, vector[i], start[i], length[i]); + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != size; i++) { + output.setVal(i, vector[i], start[i], length[i]); + } } - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; - output.isNull[i] = isNull[i]; + if (!isNull[i]) { + output.isNull[i] = false; + output.setVal(i, vector[i], start[i], length[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } + } + } else { + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + output.isNull[i] = false; + output.setVal(i, vector[i], start[i], length[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } } - } - else { - System.arraycopy(isNull, 0, output.isNull, 0, size); } } } @@ -390,9 +445,9 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { // Fill the all the vector entries with provided value public void fill(byte[] value) { - noNulls = true; isRepeating = true; - setRef(0, value, 0, value.length); + isNull[0] = false; + setVal(0, value, 0, value.length); } // Fill the column vector with nulls @@ -403,18 +458,55 @@ public void fillWithNulls() { isNull[0] = true; } + /** + * Set the element in this column vector from the given input vector. + * + * The inputElementNum will be adjusted to 0 if the input column has isRepeating set. + * + * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output + * has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - if (inputVector.isRepeating) { + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new AssertionError("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating) { inputElementNum = 0; } - if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { - isNull[outElementNum] = false; - BytesColumnVector in = (BytesColumnVector) inputVector; - setVal(outElementNum, in.vector[inputElementNum], + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + BytesColumnVector in = (BytesColumnVector) inputColVector; + setVal(outputElementNum, in.vector[inputElementNum], in.start[inputElementNum], in.length[inputElementNum]); } else { - isNull[outElementNum] = true; + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; noNulls = false; } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index bce0bd7..a498428 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -114,120 +114,148 @@ public void setRepeating(boolean isRepeating) { abstract public void flatten(boolean selectedInUse, int[] sel, int size); - // Simplify vector by brute-force flattening noNulls if isRepeating - // This can be used to reduce combinatorial explosion of code paths in VectorExpressions - // with many arguments. - protected void flattenRepeatingNulls(boolean selectedInUse, int[] sel, - int size) { + // Simplify vector by brute-force flattening noNulls if isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + protected void flattenRepeatingNulls(boolean selectedInUse, int[] sel, + int size) { - boolean nullFillValue; + boolean nullFillValue; - if (noNulls) { - nullFillValue = false; - } else { - nullFillValue = isNull[0]; + if (noNulls) { + nullFillValue = false; + } else { + nullFillValue = isNull[0]; + } + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + isNull[i] = nullFillValue; } + } else { + Arrays.fill(isNull, 0, size, nullFillValue); + } + + // all nulls are now explicit + noNulls = false; + } + protected void flattenNoNulls(boolean selectedInUse, int[] sel, + int size) { + if (noNulls) { + noNulls = false; if (selectedInUse) { for (int j = 0; j < size; j++) { - int i = sel[j]; - isNull[i] = nullFillValue; + isNull[sel[j]] = false; } } else { - Arrays.fill(isNull, 0, size, nullFillValue); + Arrays.fill(isNull, 0, size, false); } - - // all nulls are now explicit - noNulls = false; } + } - protected void flattenNoNulls(boolean selectedInUse, int[] sel, - int size) { - if (noNulls) { - noNulls = false; - if (selectedInUse) { - for (int j = 0; j < size; j++) { - isNull[sel[j]] = false; - } - } else { - Arrays.fill(isNull, 0, size, false); - } - } - } + /** + * Restore the state of isRepeating and noNulls to what it was + * before flattening. This must only be called just after flattening + * and then evaluating a VectorExpression on the column vector. + * It is an optimization that allows other operations on the same + * column to continue to benefit from the isRepeating and noNulls + * indicators. + */ + public void unFlatten() { + isRepeating = preFlattenIsRepeating; + noNulls = preFlattenNoNulls; + } - /** - * Restore the state of isRepeating and noNulls to what it was - * before flattening. This must only be called just after flattening - * and then evaluating a VectorExpression on the column vector. - * It is an optimization that allows other operations on the same - * column to continue to benefit from the isRepeating and noNulls - * indicators. - */ - public void unFlatten() { - isRepeating = preFlattenIsRepeating; - noNulls = preFlattenNoNulls; - } + // Record repeating and no nulls state to be restored later. + protected void flattenPush() { + preFlattenIsRepeating = isRepeating; + preFlattenNoNulls = noNulls; + } - // Record repeating and no nulls state to be restored later. - protected void flattenPush() { - preFlattenIsRepeating = isRepeating; - preFlattenNoNulls = noNulls; - } + /** + * Set the element in this column vector from the given input vector. + * + * The inputElementNum will be adjusted to 0 if the input column has isRepeating set. + * + * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output + * has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ + public abstract void setElement(int outputElementNum, int inputElementNum, + ColumnVector inputColVector); - /** - * Set the element in this column vector from the given input vector. - * This method can assume that the output does not have isRepeating set. - */ - public abstract void setElement(int outElementNum, int inputElementNum, - ColumnVector inputVector); - - /** - * Initialize the column vector. This method can be overridden by specific column vector types. - * Use this method only if the individual type of the column vector is not known, otherwise its - * preferable to call specific initialization methods. - */ - public void init() { - // Do nothing by default - } + /* + * Copy the current object contents into the output. Only copy selected entries + * as indicated by selectedInUse and the sel array. + */ + public abstract void copySelected( + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector); - /** - * Ensure the ColumnVector can hold at least size values. - * This method is deliberately *not* recursive because the complex types - * can easily have more (or less) children than the upper levels. - * @param size the new minimum size - * @param preserveData should the old data be preserved? - */ - public void ensureSize(int size, boolean preserveData) { - if (isNull.length < size) { - boolean[] oldArray = isNull; - isNull = new boolean[size]; - if (preserveData && !noNulls) { - if (isRepeating) { - isNull[0] = oldArray[0]; - } else { - System.arraycopy(oldArray, 0, isNull, 0, oldArray.length); - } + /** + * Initialize the column vector. This method can be overridden by specific column vector types. + * Use this method only if the individual type of the column vector is not known, otherwise its + * preferable to call specific initialization methods. + */ + public void init() { + // Do nothing by default + } + + /** + * Ensure the ColumnVector can hold at least size values. + * This method is deliberately *not* recursive because the complex types + * can easily have more (or less) children than the upper levels. + * @param size the new minimum size + * @param preserveData should the old data be preserved? + */ + public void ensureSize(int size, boolean preserveData) { + if (isNull.length < size) { + boolean[] oldArray = isNull; + isNull = new boolean[size]; + if (preserveData && !noNulls) { + if (isRepeating) { + isNull[0] = oldArray[0]; + } else { + System.arraycopy(oldArray, 0, isNull, 0, oldArray.length); } } } + } - /** - * Print the value for this column into the given string builder. - * @param buffer the buffer to print into - * @param row the id of the row to print - */ - public abstract void stringifyValue(StringBuilder buffer, - int row); - - /** - * Shallow copy of the contents of this vector to the other vector; - * replaces other vector's values. - */ - public void shallowCopyTo(ColumnVector otherCv) { - otherCv.isNull = isNull; - otherCv.noNulls = noNulls; - otherCv.isRepeating = isRepeating; - otherCv.preFlattenIsRepeating = preFlattenIsRepeating; - otherCv.preFlattenNoNulls = preFlattenNoNulls; - } + /** + * Print the value for this column into the given string builder. + * @param buffer the buffer to print into + * @param row the id of the row to print + */ + public abstract void stringifyValue(StringBuilder buffer, + int row); + + /** + * Shallow copy of the contents of this vector to the other vector; + * replaces other vector's values. + */ + public void shallowCopyTo(ColumnVector otherCv) { + otherCv.isNull = isNull; + otherCv.noNulls = noNulls; + otherCv.isRepeating = isRepeating; + otherCv.preFlattenIsRepeating = preFlattenIsRepeating; + otherCv.preFlattenNoNulls = preFlattenNoNulls; } +} diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java index 37b0bf5..615eb6f 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java @@ -28,7 +28,7 @@ public short scale; public short precision; - private HiveDecimalWritable tempHiveDecWritable; + private HiveDecimalWritable scratchHiveDecWritable; public Decimal64ColumnVector(int precision, int scale) { this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale); @@ -38,30 +38,124 @@ public Decimal64ColumnVector(int size, int precision, int scale) { super(size); this.precision = (short) precision; this.scale = (short) scale; - tempHiveDecWritable = new HiveDecimalWritable(); + scratchHiveDecWritable = new HiveDecimalWritable(); } + /** + * Set a Decimal64 field from a HiveDecimalWritable. + * + * This is a FAST version that assumes the caller has checked to make sure the writable + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. + * + * We will check for precision/scale range, so the entry's NULL may get set. + * Otherwise, only the output entry fields will be set by this method. + * + * @param elementNum + * @param writable + */ public void set(int elementNum, HiveDecimalWritable writable) { - tempHiveDecWritable.set(writable); - tempHiveDecWritable.mutateEnforcePrecisionScale(precision, scale); - if (!tempHiveDecWritable.isSet()) { + scratchHiveDecWritable.set(writable); + scratchHiveDecWritable.mutateEnforcePrecisionScale(precision, scale); + if (!scratchHiveDecWritable.isSet()) { noNulls = false; isNull[elementNum] = true; } else { - isNull[elementNum] = false; - vector[elementNum] = tempHiveDecWritable.serialize64(scale); + vector[elementNum] = scratchHiveDecWritable.serialize64(scale); } } + /** + * Set a Decimal64 field from a HiveDecimal. + * + * This is a FAST version that assumes the caller has checked to make sure the hiveDec + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. + * + * We will check for precision/scale range, so the entry's NULL may get set. + * Otherwise, only the output entry fields will be set by this method. + * + * @param elementNum + * @param hiveDec + */ public void set(int elementNum, HiveDecimal hiveDec) { - tempHiveDecWritable.set(hiveDec); - tempHiveDecWritable.mutateEnforcePrecisionScale(precision, scale); - if (!tempHiveDecWritable.isSet()) { + scratchHiveDecWritable.set(hiveDec); + scratchHiveDecWritable.mutateEnforcePrecisionScale(precision, scale); + if (!scratchHiveDecWritable.isSet()) { noNulls = false; isNull[elementNum] = true; } else { - isNull[elementNum] = false; - vector[elementNum] = tempHiveDecWritable.serialize64(scale); + vector[elementNum] = scratchHiveDecWritable.serialize64(scale); } } + + /** + * Set the element in this column vector from the given input vector. + * + * The inputElementNum will be adjusted to 0 if the input column has isRepeating set. + * + * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output + * has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ + @Override + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating) { + inputElementNum = 0; + } + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + Decimal64ColumnVector decimal64ColVector = (Decimal64ColumnVector) inputColVector; + scratchHiveDecWritable.deserialize64( + decimal64ColVector.vector[inputElementNum], decimal64ColVector.scale); + scratchHiveDecWritable.mutateEnforcePrecisionScale(precision, scale); + if (scratchHiveDecWritable.isSet()) { + vector[inputElementNum] = scratchHiveDecWritable.serialize64(scale); + } else { + + // In effect, the input is NULL because of out-of-range precision/scale. + noNulls = false; + isNull[inputElementNum] = true; + } + } else { + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; + noNulls = false; + } + } + + /** + * Return a convenience writable object stored by this column vector. + * @return + */ + public HiveDecimalWritable getScratchWritable() { + return scratchHiveDecWritable; + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index e41e19f..c1d6a3a 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -51,37 +53,74 @@ public DecimalColumnVector(int size, int precision, int scale) { // Fill the all the vector entries with provided value public void fill(HiveDecimal value) { - noNulls = true; isRepeating = true; + isNull[0] = false; if (vector[0] == null) { vector[0] = new HiveDecimalWritable(value); - } else { - vector[0].set(value); } + set(0, value); } @Override public void flatten(boolean selectedInUse, int[] sel, int size) { - // TODO Auto-generated method stub + throw new RuntimeException("Not implemented"); } + /** + * Set the element in this column vector from the given input vector. + * + * The inputElementNum will be adjusted to 0 if the input column has isRepeating set. + * + * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output + * has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - if (inputVector.isRepeating) { + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating) { inputElementNum = 0; } - if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { - vector[outElementNum].set( - ((DecimalColumnVector) inputVector).vector[inputElementNum], + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + vector[outputElementNum].set( + ((DecimalColumnVector) inputColVector).vector[inputElementNum], precision, scale); - if (!vector[outElementNum].isSet()) { - isNull[outElementNum] = true; + if (!vector[outputElementNum].isSet()) { + + // In effect, the input is NULL because of out-of-range precision/scale. + isNull[outputElementNum] = true; noNulls = false; - } else { - isNull[outElementNum] = false; } } else { - isNull[outElementNum] = true; + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; noNulls = false; } } @@ -98,23 +137,45 @@ public void stringifyValue(StringBuilder buffer, int row) { } } - public void set(int elementNum, HiveDecimalWritable writeable) { - vector[elementNum].set(writeable, precision, scale); + /** + * Set a Decimal64 field from a HiveDecimalWritable. + * + * This is a FAST version that assumes the caller has checked to make sure the writable + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. + * + * We will check for precision/scale range, so the entry's NULL may get set. + * Otherwise, only the output entry fields will be set by this method. + * + * @param elementNum + * @param writable + */ + public void set(int elementNum, HiveDecimalWritable writable) { + vector[elementNum].set(writable, precision, scale); if (!vector[elementNum].isSet()) { noNulls = false; isNull[elementNum] = true; - } else { - isNull[elementNum] = false; } } + /** + * Set a decimal from a HiveDecimal. + * + * This is a FAST version that assumes the caller has checked to make sure the hiveDec + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. + * + * We will check for precision/scale range, so the entry's NULL may get set. + * Otherwise, only the output entry fields will be set by this method. + * + * @param elementNum + * @param hiveDec + */ public void set(int elementNum, HiveDecimal hiveDec) { vector[elementNum].set(hiveDec, precision, scale); if (!vector[elementNum].isSet()) { noNulls = false; isNull[elementNum] = true; - } else { - isNull[elementNum] = false; } } @@ -149,4 +210,91 @@ public void shallowCopyTo(ColumnVector otherCv) { other.precision = precision; other.vector = vector; } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + @Override + public void copySelected( + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + DecimalColumnVector output = (DecimalColumnVector) outputColVector; + boolean[] outputIsNull = output.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + if (noNulls || !isNull[0]) { + outputIsNull[0] = false; + output.set(0, vector[0]); + } else { + outputIsNull[0] = true; + output.noNulls = false; + output.vector[0].setFromLong(0); + } + output.isRepeating = true; + return; + } + + // Handle normal case + + if (noNulls) { + if (selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + output.set(i, vector[i]); + } + } else { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + output.set(i, vector[i]); + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != size; i++) { + output.set(i, vector[i]); + } + } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + if (!isNull[i]) { + output.isNull[i] = false; + output.set(i, vector[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } + } + } else { + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + output.isNull[i] = false; + output.set(i, vector[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } + } + } + } + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index e04af01..f833bde 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -54,52 +54,88 @@ public DoubleColumnVector(int len) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + DoubleColumnVector output = (DoubleColumnVector) outputColVector; + boolean[] outputIsNull = output.isNull; - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.vector[0] = vector[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + outputIsNull[0] = false; + output.vector[0] = vector[0]; + } else { + outputIsNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.vector[i] = vector[i]; + if (noNulls) { + if (selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + output.vector[i] = vector[i]; + } + } else { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + output.vector[i] = vector[i]; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + System.arraycopy(vector, 0, output.vector, 0, size); } - } - else { - System.arraycopy(vector, 0, output.vector, 0, size); - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.vector[i] = vector[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + for (int i = 0; i < size; i++) { + output.vector[i] = vector[i]; + } } } } // Fill the column vector with the provided value public void fill(double value) { - noNulls = true; isRepeating = true; + isNull[0] = false; vector[0] = value; } @@ -132,17 +168,54 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { flattenNoNulls(selectedInUse, sel, size); } + /** + * Set the element in this column vector from the given input vector. + * + * The inputElementNum will be adjusted to 0 if the input column has isRepeating set. + * + * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output + * has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - if (inputVector.isRepeating) { + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating) { inputElementNum = 0; } - if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { - isNull[outElementNum] = false; - vector[outElementNum] = - ((DoubleColumnVector) inputVector).vector[inputElementNum]; + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + vector[outputElementNum] = + ((DoubleColumnVector) inputColVector).vector[inputElementNum]; } else { - isNull[outElementNum] = true; + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; noNulls = false; } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java index f813b1b..9324bc0 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java @@ -195,13 +195,57 @@ public int compareTo(IntervalDayTimeColumnVector intervalDayTimeColVector1, int asScratchIntervalDayTime(elementNum2)); } + /** + * Set the element in this column vector from the given input vector. + * + * The inputElementNum will be adjusted to 0 if the input column has isRepeating set. + * + * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output + * has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating) { + inputElementNum = 0; + } + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } - IntervalDayTimeColumnVector timestampColVector = (IntervalDayTimeColumnVector) inputVector; + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + IntervalDayTimeColumnVector timestampColVector = (IntervalDayTimeColumnVector) inputColVector; + totalSeconds[outputElementNum] = timestampColVector.totalSeconds[inputElementNum]; + nanos[outputElementNum] = timestampColVector.nanos[inputElementNum]; + } else { - totalSeconds[outElementNum] = timestampColVector.totalSeconds[inputElementNum]; - nanos[outElementNum] = timestampColVector.nanos[inputElementNum]; + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; + noNulls = false; + } } // Simplify vector by brute-force flattening noNulls and isRepeating @@ -229,8 +273,12 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { } /** - * Set a row from a HiveIntervalDayTime. - * We assume the entry has already been isRepeated adjusted. + * Set a field from a HiveIntervalDayTime. + * + * This is a FAST version that assumes the caller has checked to make sure the sourceBuf + * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. Only the output entry fields will be set by this method. + * * @param elementNum * @param intervalDayTime */ @@ -240,7 +288,12 @@ public void set(int elementNum, HiveIntervalDayTime intervalDayTime) { } /** - * Set a row from the current value in the scratch interval day time. + * Set a field from the current value in the scratch interval day time. + * + * This is a FAST version that assumes the caller has checked to make sure the scratch interval + * day time is valid and elementNum is correctly adjusted for isRepeating. And, that the isNull + * entry has been set. Only the output entry fields will be set by this method. + * * @param elementNum */ public void setFromScratchIntervalDayTime(int elementNum) { @@ -260,47 +313,86 @@ public void setNullValue(int elementNum) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, IntervalDayTimeColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + IntervalDayTimeColumnVector output = (IntervalDayTimeColumnVector) outputColVector; + boolean[] outputIsNull = output.isNull; + + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.totalSeconds[0] = totalSeconds[0]; - output.nanos[0] = nanos[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + outputIsNull[0] = false; + output.totalSeconds[0] = totalSeconds[0]; + output.nanos[0] = nanos[0]; + } else { + outputIsNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.totalSeconds[i] = totalSeconds[i]; - output.nanos[i] = nanos[i]; + if (noNulls) { + if (selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + output.totalSeconds[i] = totalSeconds[i]; + output.nanos[i] = nanos[i]; + } + } else { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + output.totalSeconds[i] = totalSeconds[i]; + output.nanos[i] = nanos[i]; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != size; i++) { + output.totalSeconds[i] = totalSeconds[i]; + output.nanos[i] = nanos[i]; + } } - } - else { - System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size); - System.arraycopy(nanos, 0, output.nanos, 0, size); - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.totalSeconds[i] = totalSeconds[i]; + output.nanos[i] = nanos[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); } } } @@ -310,8 +402,8 @@ public void copySelected( * @param intervalDayTime */ public void fill(HiveIntervalDayTime intervalDayTime) { - noNulls = true; isRepeating = true; + isNull[0] = false; totalSeconds[0] = intervalDayTime.getTotalSeconds(); nanos[0] = intervalDayTime.getNanos(); } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java index 7ecb1e0..8cbcc02 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java @@ -49,28 +49,67 @@ protected void childFlatten(boolean useSelected, int[] selected, int size) { child.flatten(useSelected, selected, size); } + /** + * Set the element in this column vector from the given input vector. + * + * The inputElementNum will be adjusted to 0 if the input column has isRepeating set. + * + * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output + * has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, - ColumnVector inputVector) { - ListColumnVector input = (ListColumnVector) inputVector; - if (input.isRepeating) { + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating) { inputElementNum = 0; } - if (!input.noNulls && input.isNull[inputElementNum]) { - isNull[outElementNum] = true; - noNulls = false; - } else { - isNull[outElementNum] = false; + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + // CONCERN: isRepeating + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + ListColumnVector input = (ListColumnVector) inputColVector; int offset = childCount; int length = (int) input.lengths[inputElementNum]; int inputOffset = (int) input.offsets[inputElementNum]; - offsets[outElementNum] = offset; + offsets[outputElementNum] = offset; childCount += length; - lengths[outElementNum] = length; + lengths[outputElementNum] = length; child.ensureSize(childCount, true); for (int i = 0; i < length; ++i) { - child.setElement(i + offset, inputOffset + i, input.child); + final int outputIndex = i + offset; + child.isNull[outputIndex] = false; + child.setElement(outputIndex, inputOffset + i, input.child); } + } else { + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; + noNulls = false; } } @@ -116,4 +155,10 @@ public void unFlatten() { } } + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } + } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index 49e9184..443a076 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -54,44 +54,78 @@ public LongColumnVector(int len) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, LongColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + LongColumnVector output = (LongColumnVector) outputColVector; + boolean[] outputIsNull = output.isNull; - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.vector[0] = vector[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + outputIsNull[0] = false; + output.vector[0] = vector[0]; + } else { + outputIsNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.vector[i] = vector[i]; + if (noNulls) { + if (selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + output.vector[i] = vector[i]; + } + } else { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + output.vector[i] = vector[i];; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + System.arraycopy(vector, 0, output.vector, 0, size); } - } - else { - System.arraycopy(vector, 0, output.vector, 0, size); - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.vector[i] = vector[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(vector, 0, output.vector, 0, size); } } } @@ -101,51 +135,81 @@ public void copySelected( public void copySelected( boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + boolean[] outputIsNull = output.isNull; + + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.vector[0] = vector[0]; // automatic conversion to double is done here - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + outputIsNull[0] = false; + output.vector[0] = vector[0]; // automatic conversion to double is done here + } else { + outputIsNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.vector[i] = vector[i]; - } - } - else { - for(int i = 0; i < size; ++i) { - output.vector[i] = vector[i]; + if (noNulls) { + if (selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!output.noNulls) { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + output.vector[i] = vector[i]; + } + } else { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + output.vector[i] = vector[i];; + } + } + } else { + if (!output.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + output.noNulls = true; + } + System.arraycopy(vector, 0, output.vector, 0, size); } - } + } else /* there are NULLs in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.vector[i] = vector[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(vector, 0, output.vector, 0, size); } } } // Fill the column vector with the provided value public void fill(long value) { - noNulls = true; isRepeating = true; + isNull[0] = false; vector[0] = value; } @@ -178,17 +242,52 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { flattenNoNulls(selectedInUse, sel, size); } + /** + * Set the element in this column vector from the given input vector. + * + * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE + * when the input / output has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - if (inputVector.isRepeating) { + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating) { inputElementNum = 0; } - if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { - isNull[outElementNum] = false; - vector[outElementNum] = - ((LongColumnVector) inputVector).vector[inputElementNum]; + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + vector[outputElementNum] = + ((LongColumnVector) inputColVector).vector[inputElementNum]; } else { - isNull[outElementNum] = true; + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; noNulls = false; } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java index 078c9c1..3143a44 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java @@ -53,30 +53,71 @@ protected void childFlatten(boolean useSelected, int[] selected, int size) { values.flatten(useSelected, selected, size); } + /** + * Set the element in this column vector from the given input vector. + * + * The inputElementNum will be adjusted to 0 if the input column has isRepeating set. + * + * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output + * has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, - ColumnVector inputVector) { - if (inputVector.isRepeating) { + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating) { inputElementNum = 0; } - if (!inputVector.noNulls && inputVector.isNull[inputElementNum]) { - isNull[outElementNum] = true; - noNulls = false; - } else { - MapColumnVector input = (MapColumnVector) inputVector; - isNull[outElementNum] = false; + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + MapColumnVector input = (MapColumnVector) inputColVector; + isNull[outputElementNum] = false; int offset = childCount; int length = (int) input.lengths[inputElementNum]; int inputOffset = (int) input.offsets[inputElementNum]; - offsets[outElementNum] = offset; + offsets[outputElementNum] = offset; childCount += length; - lengths[outElementNum] = length; + lengths[outputElementNum] = length; keys.ensureSize(childCount, true); values.ensureSize(childCount, true); for (int i = 0; i < length; ++i) { - keys.setElement(i + offset, inputOffset + i, input.keys); - values.setElement(i + offset, inputOffset + i, input.values); + final int inputIndex = inputOffset + i; + final int outputIndex = i + offset; + keys.isNull[outputIndex] = false; + keys.setElement(outputIndex, inputIndex, input.keys); + values.isNull[outputIndex] = false; + values.setElement(outputIndex, inputIndex, input.values); } + } else { + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; + noNulls = false; } } @@ -128,4 +169,10 @@ public void unFlatten() { values.unFlatten(); } } + + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java index b65c802..70d6ab4 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java @@ -53,21 +53,60 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { flattenNoNulls(selectedInUse, sel, size); } + /** + * Set the element in this column vector from the given input vector. + * + * The inputElementNum will be adjusted to 0 if the input column has isRepeating set. + * + * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output + * has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, - ColumnVector inputVector) { - if (inputVector.isRepeating) { + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating) { inputElementNum = 0; } - if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { - isNull[outElementNum] = false; - ColumnVector[] inputFields = ((StructColumnVector) inputVector).fields; + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + ColumnVector[] inputFields = ((StructColumnVector) inputColVector).fields; for (int i = 0; i < inputFields.length; ++i) { - fields[i].setElement(outElementNum, inputElementNum, inputFields[i]); + ColumnVector inputField = inputFields[i]; + ColumnVector outputField = fields[i]; + outputField.isNull[outputElementNum] = false; + outputField.setElement(outputElementNum, inputElementNum, inputField); } } else { + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; noNulls = false; - isNull[outElementNum] = true; } } @@ -134,4 +173,10 @@ public void setRepeating(boolean isRepeating) { public void shallowCopyTo(ColumnVector otherCv) { throw new UnsupportedOperationException(); // Implement if needed. } + + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java index 0e7f86f..a6f5369 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java @@ -241,13 +241,57 @@ public int compareTo(TimestampColumnVector timestampColVector1, int elementNum1, asScratchTimestamp(elementNum2)); } + /** + * Set the element in this column vector from the given input vector. + * + * The inputElementNum will be adjusted to 0 if the input column has isRepeating set. + * + * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output + * has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { - TimestampColumnVector timestampColVector = (TimestampColumnVector) inputVector; + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating) { + inputElementNum = 0; + } - time[outElementNum] = timestampColVector.time[inputElementNum]; - nanos[outElementNum] = timestampColVector.nanos[inputElementNum]; + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVector; + time[outputElementNum] = timestampColVector.time[inputElementNum]; + nanos[outputElementNum] = timestampColVector.nanos[inputElementNum]; + } else { + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; + noNulls = false; + } } // Simplify vector by brute-force flattening noNulls and isRepeating @@ -275,23 +319,35 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { } /** - * Set a row from a timestamp. - * We assume the entry has already been isRepeated adjusted. + * Set a field from a Timestamp. + * + * This is a FAST version that assumes the caller has checked to make sure elementNum + * is correctly adjusted for isRepeating. And, that the isNull entry + * has been set. Only the output entry fields will be set by this method. + * + * For backward compatibility, this method does check if the timestamp is null and set the + * isNull entry appropriately. + * * @param elementNum * @param timestamp */ public void set(int elementNum, Timestamp timestamp) { if (timestamp == null) { - this.noNulls = false; - this.isNull[elementNum] = true; - } else { - this.time[elementNum] = timestamp.getTime(); - this.nanos[elementNum] = timestamp.getNanos(); + isNull[elementNum] = true; + noNulls = false; + return; } + this.time[elementNum] = timestamp.getTime(); + this.nanos[elementNum] = timestamp.getNanos(); } /** - * Set a row from the current value in the scratch timestamp. + * Set a field from the current value in the scratch timestamp. + * + * This is a FAST version that assumes the caller has checked to make sure the current value in + * the scratch timestamp is valid and elementNum is correctly adjusted for isRepeating. And, + * that the isNull entry has been set. Only the output entry fields will be set by this method. + * * @param elementNum */ public void setFromScratchTimestamp(int elementNum) { @@ -311,47 +367,84 @@ public void setNullValue(int elementNum) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, TimestampColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + TimestampColumnVector output = (TimestampColumnVector) outputColVector; + boolean[] outputIsNull = output.isNull; + + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.time[0] = time[0]; - output.nanos[0] = nanos[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + outputIsNull[0] = false; + output.time[0] = time[0]; + output.nanos[0] = nanos[0]; + } else { + outputIsNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.time[i] = time[i]; - output.nanos[i] = nanos[i]; + if (noNulls) { + if (selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + output.time[i] = time[i]; + output.nanos[i] = nanos[i]; + } + } else { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + output.time[i] = time[i]; + output.nanos[i] = nanos[i]; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + System.arraycopy(time, 0, output.time, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); } - } - else { - System.arraycopy(time, 0, output.time, 0, size); - System.arraycopy(nanos, 0, output.nanos, 0, size); - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.time[i] = time[i]; + output.nanos[i] = nanos[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(time, 0, output.time, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); } } } @@ -361,8 +454,8 @@ public void copySelected( * @param timestamp */ public void fill(Timestamp timestamp) { - noNulls = true; isRepeating = true; + isNull[0] = false; time[0] = timestamp.getTime(); nanos[0] = timestamp.getNanos(); } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java index 448461b..09d519d 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java @@ -55,21 +55,61 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { flattenNoNulls(selectedInUse, sel, size); } + /** + * Set the element in this column vector from the given input vector. + * + * The inputElementNum will be adjusted to 0 if the input column has isRepeating set. + * + * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output + * has isRepeating set. + * + * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This + * supports the caller to do output NULL processing in advance that may cause the output results + * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE. + * + * The inputColVector noNulls and isNull entry will be examined. The output will only + * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where + * inputElementNum may have been adjusted to 0 for isRepeating. + * + * If the input entry is NULL or out-of-range, the output will be marked as NULL. + * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range + * is the DecimalColumnVector which can find the input decimal does not fit in the output + * precision/scale. + * + * (Since we return immediately if the output entry is NULL, we have no need and do not mark + * the output entry to NOT NULL). + * + */ @Override - public void setElement(int outElementNum, int inputElementNum, - ColumnVector inputVector) { - if (inputVector.isRepeating) { + public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) { + + // Invariants. + if (isRepeating && outputElementNum != 0) { + throw new RuntimeException("Output column number expected to be 0 when isRepeating"); + } + if (inputColVector.isRepeating) { inputElementNum = 0; } - if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { - isNull[outElementNum] = false; - UnionColumnVector input = (UnionColumnVector) inputVector; - tags[outElementNum] = input.tags[inputElementNum]; - fields[tags[outElementNum]].setElement(outElementNum, inputElementNum, - input.fields[tags[outElementNum]]); + + // Do NOTHING if output is NULL. + if (!noNulls && isNull[outputElementNum]) { + return; + } + + if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) { + UnionColumnVector input = (UnionColumnVector) inputColVector; + final int tag = input.tags[inputElementNum]; + tags[outputElementNum] = tag; + ColumnVector inputField = input.fields[tag]; + ColumnVector outputField = fields[tag]; + outputField.isNull[outputElementNum] = false; + outputField.setElement( + outputElementNum, inputElementNum, inputField); } else { + + // Only mark output NULL when input is NULL. + isNull[outputElementNum] = true; noNulls = false; - isNull[outElementNum] = true; } } @@ -142,4 +182,10 @@ public void setRepeating(boolean isRepeating) { public void shallowCopyTo(ColumnVector otherCv) { throw new UnsupportedOperationException(); // Implement if needed. } + + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 42c7e8f..ea13c24 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -136,6 +136,50 @@ private static String toUTF8(Object o) { return o.toString(); } + public String stringifyColumn(int columnNum) { + if (size == 0) { + return ""; + } + StringBuilder b = new StringBuilder(); + b.append("columnNum "); + b.append(columnNum); + b.append(", size "); + b.append(size); + b.append(", selectedInUse "); + b.append(selectedInUse); + ColumnVector colVector = cols[columnNum]; + b.append(", noNulls "); + b.append(colVector.noNulls); + b.append(", isRepeating "); + b.append(colVector.isRepeating); + b.append('\n'); + + final boolean noNulls = colVector.noNulls; + final boolean[] isNull = colVector.isNull; + if (colVector.isRepeating) { + final boolean hasRepeatedValue = (noNulls || !isNull[0]); + for (int i = 0; i < size; i++) { + if (hasRepeatedValue) { + colVector.stringifyValue(b, 0); + } else { + b.append("NULL"); + } + b.append('\n'); + } + } else { + for (int i = 0; i < size; i++) { + final int batchIndex = (selectedInUse ? selected[i] : i); + if (noNulls || !isNull[batchIndex]) { + colVector.stringifyValue(b, batchIndex); + } else { + b.append("NULL"); + } + b.append('\n'); + } + } + return b.toString(); + } + @Override public String toString() { if (size == 0) { diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestListColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestListColumnVector.java index eff0745..46305fa 100644 --- storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestListColumnVector.java +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestListColumnVector.java @@ -157,6 +157,7 @@ public void testSet() throws Exception { output.childCount = 30; // copy a null + output.isNull[3] = false; output.setElement(3, 6, input); assertEquals(30, output.childCount); StringBuilder buf = new StringBuilder(); @@ -164,6 +165,7 @@ public void testSet() throws Exception { assertEquals("null", buf.toString()); // copy a value + output.isNull[3] = false; output.setElement(3, 5, input); assertEquals(30, output.offsets[3]); assertEquals(2, output.lengths[3]); @@ -173,6 +175,7 @@ public void testSet() throws Exception { assertEquals("[50, 60]", buf.toString()); // overwrite a value + output.isNull[3] = false; output.setElement(3, 4, input); assertEquals(34, output.childCount); assertEquals(34, output1.vector.length); diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestMapColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestMapColumnVector.java index f98c51f..dd29f28 100644 --- storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestMapColumnVector.java +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestMapColumnVector.java @@ -170,6 +170,7 @@ public void testSet() throws Exception { output.childCount = 30; // copy a null + output.isNull[3] = false; output.setElement(3, 6, input); assertEquals(30, output.childCount); StringBuilder buf = new StringBuilder(); @@ -177,6 +178,7 @@ public void testSet() throws Exception { assertEquals("null", buf.toString()); // copy a value + output.isNull[3] = false; output.setElement(3, 5, input); assertEquals(30, output.offsets[3]); assertEquals(2, output.lengths[3]); @@ -187,6 +189,7 @@ public void testSet() throws Exception { " {\"key\": 60, \"value\": 600.0}]", buf.toString()); // overwrite a value + output.isNull[3] = false; output.setElement(3, 4, input); assertEquals(34, output.childCount); assertEquals(34, output1.vector.length); diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java index 9ae248c..6ffd6d1 100644 --- storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java @@ -80,14 +80,17 @@ public void testSet() throws Exception { input1.vector[i] = i + 1; input2.vector[i] = i + 2; } + output.isNull[3] = false; output.setElement(3, 6, input); StringBuilder buf = new StringBuilder(); output.stringifyValue(buf, 3); assertEquals("null", buf.toString()); + output.isNull[3] = false; output.setElement(3, 5, input); buf = new StringBuilder(); output.stringifyValue(buf, 3); assertEquals("[1, null]", buf.toString()); + output.isNull[3] = false; output.setElement(3, 4, input); buf = new StringBuilder(); output.stringifyValue(buf, 3); diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index ffd5582..fbb89a9 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -3101,6 +3101,11 @@ private void generateDTIScalarCompareColumn(String[] tdesc) throws Exception { templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType); + + String vectorExpressionParametersBody = getDTIScalarColumnDisplayBody(operandType); + templateString = templateString.replaceAll( + "", vectorExpressionParametersBody); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } @@ -3121,6 +3126,30 @@ private void generateFilterDTIScalarCompareColumn(String[] tdesc) throws Excepti className, templateString); } + private String getDTIScalarColumnDisplayBody(String type) { + if (type.equals("date")) { + return + "Date dt = new Date(0);" + + " dt.setTime(DateWritable.daysToMillis((int) value));\n" + + " return \"date \" + dt.toString() + \", \" + getColumnParamString(0, colNum);"; + } else { + return + " return super.vectorExpressionParameters();"; + } + } + + private String getDTIColumnScalarDisplayBody(String type) { + if (type.equals("date")) { + return + "Date dt = new Date(0);" + + " dt.setTime(DateWritable.daysToMillis((int) value));\n" + + " return getColumnParamString(0, colNum) + \", date \" + dt.toString();"; + } else { + return + " return super.vectorExpressionParameters();"; + } + } + private void generateDTIColumnCompareScalar(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType = tdesc[2]; @@ -3133,6 +3162,11 @@ private void generateDTIColumnCompareScalar(String[] tdesc) throws Exception { templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType); + + String vectorExpressionParametersBody = getDTIColumnScalarDisplayBody(operandType); + templateString = templateString.replaceAll( + "", vectorExpressionParametersBody); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); }